def test_loc_scalar(self): result = self.df.loc["a"] expected = DataFrame({ "A": [0, 1, 5], "B": (Series(list("aaa")).astype(CDT(list("cab")))) }).set_index("B") tm.assert_frame_equal(result, expected) df = self.df.copy() df.loc["a"] = 20 expected = DataFrame({ "A": [20, 20, 2, 3, 4, 20], "B": (Series(list("aabbca")).astype(CDT(list("cab")))), }).set_index("B") tm.assert_frame_equal(df, expected) # value not in the categories with pytest.raises(KeyError, match=r"^'d'$"): df.loc["d"] msg = "cannot append a non-category item to a CategoricalIndex" with pytest.raises(TypeError, match=msg): df.loc["d"] = 10 msg = ("cannot insert an item into a CategoricalIndex that is not " "already an existing category") msg = "'fill_value=d' is not present in this Categorical's categories" with pytest.raises(ValueError, match=msg): df.loc["d", "A"] = 10 with pytest.raises(ValueError, match=msg): df.loc["d", "C"] = 10 with pytest.raises(KeyError, match="^1$"): df.loc[1]
def setup_method(self, method): self.df = DataFrame({ 'A': np.arange(6, dtype='int64'), 'B': Series(list('aabbca')).astype(CDT(list('cab'))) }).set_index('B') self.df2 = DataFrame({ 'A': np.arange(6, dtype='int64'), 'B': Series(list('aabbca')).astype(CDT(list('cabe'))) }).set_index('B') self.df3 = DataFrame({ 'A': np.arange(6, dtype='int64'), 'B': (Series([1, 1, 2, 1, 3, 2]).astype(CDT([3, 2, 1], ordered=True))) }).set_index('B') self.df4 = DataFrame({ 'A': np.arange(6, dtype='int64'), 'B': (Series([1, 1, 2, 1, 3, 2]).astype(CDT([3, 2, 1], ordered=False))) }).set_index('B')
def test_loc_scalar(self): result = self.df.loc['a'] expected = (DataFrame({ 'A': [0, 1, 5], 'B': (Series(list('aaa')).astype(CDT(list('cab')))) }).set_index('B')) assert_frame_equal(result, expected) df = self.df.copy() df.loc['a'] = 20 expected = (DataFrame({ 'A': [20, 20, 2, 3, 4, 20], 'B': (Series(list('aabbca')).astype(CDT(list('cab')))) }).set_index('B')) assert_frame_equal(df, expected) # value not in the categories with pytest.raises(KeyError, match=r"^'d'$"): df.loc['d'] msg = "cannot append a non-category item to a CategoricalIndex" with pytest.raises(TypeError, match=msg): df.loc['d'] = 10 msg = ("cannot insert an item into a CategoricalIndex that is not" " already an existing category") with pytest.raises(TypeError, match=msg): df.loc['d', 'A'] = 10 with pytest.raises(TypeError, match=msg): df.loc['d', 'C'] = 10
def test_loc_scalar(self): result = self.df.loc['a'] expected = (DataFrame({ 'A': [0, 1, 5], 'B': (Series(list('aaa')).astype(CDT(list('cab')))) }).set_index('B')) assert_frame_equal(result, expected) df = self.df.copy() df.loc['a'] = 20 expected = (DataFrame({ 'A': [20, 20, 2, 3, 4, 20], 'B': (Series(list('aabbca')).astype(CDT(list('cab')))) }).set_index('B')) assert_frame_equal(df, expected) # value not in the categories pytest.raises(KeyError, lambda: df.loc['d']) def f(): df.loc['d'] = 10 pytest.raises(TypeError, f) def f(): df.loc['d', 'A'] = 10 pytest.raises(TypeError, f) def f(): df.loc['d', 'C'] = 10 pytest.raises(TypeError, f)
def test_loc_scalar(self): result = self.df.loc["a"] expected = DataFrame({ "A": [0, 1, 5], "B": (Series(list("aaa")).astype(CDT(list("cab")))) }).set_index("B") tm.assert_frame_equal(result, expected) df = self.df.copy() df.loc["a"] = 20 expected = DataFrame({ "A": [20, 20, 2, 3, 4, 20], "B": (Series(list("aabbca")).astype(CDT(list("cab")))), }).set_index("B") tm.assert_frame_equal(df, expected) # value not in the categories with pytest.raises(KeyError, match=r"^'d'$"): df.loc["d"] df2 = df.copy() expected = df2.copy() expected.index = expected.index.astype(object) expected.loc["d"] = 10 df2.loc["d"] = 10 tm.assert_frame_equal(df2, expected) msg = "'fill_value=d' is not present in this Categorical's categories" with pytest.raises(TypeError, match=msg): df.loc["d", "A"] = 10 with pytest.raises(TypeError, match=msg): df.loc["d", "C"] = 10 with pytest.raises(KeyError, match="^1$"): df.loc[1]
def setup_method(self, method): self.df = DataFrame( { "A": np.arange(6, dtype="int64"), "B": Series(list("aabbca")).astype(CDT(list("cab"))), } ).set_index("B") self.df2 = DataFrame( { "A": np.arange(6, dtype="int64"), "B": Series(list("aabbca")).astype(CDT(list("cabe"))), } ).set_index("B") self.df3 = DataFrame( { "A": np.arange(6, dtype="int64"), "B": (Series([1, 1, 2, 1, 3, 2]).astype(CDT([3, 2, 1], ordered=True))), } ).set_index("B") self.df4 = DataFrame( { "A": np.arange(6, dtype="int64"), "B": (Series([1, 1, 2, 1, 3, 2]).astype(CDT([3, 2, 1], ordered=False))), } ).set_index("B")
def setup_method(self, method): self.df = DataFrame( { "A": np.arange(6, dtype="int64"), }, index=CategoricalIndex(list("aabbca"), dtype=CDT(list("cab")), name="B"), ) self.df2 = DataFrame( { "A": np.arange(6, dtype="int64"), }, index=CategoricalIndex(list("aabbca"), dtype=CDT(list("cabe")), name="B"), )
def test_series_retbins(self): # GH 8589 s = Series(np.arange(4)) result, bins = cut(s, 2, retbins=True) expected = Series(IntervalIndex.from_breaks( [-0.003, 1.5, 3], closed='right').repeat(2)).astype( CDT(ordered=True)) tm.assert_series_equal(result, expected) result, bins = qcut(s, 2, retbins=True) expected = Series(IntervalIndex.from_breaks( [-0.001, 1.5, 3], closed='right').repeat(2)).astype( CDT(ordered=True)) tm.assert_series_equal(result, expected)
def test_datetime_tz_cut(bins, box): # see gh-19872 tz = "US/Eastern" s = Series(date_range("20130101", periods=3, tz=tz)) if not isinstance(bins, int): bins = box(bins) result = cut(s, bins) expected = Series( IntervalIndex([ Interval( Timestamp("2012-12-31 23:57:07.200000", tz=tz), Timestamp("2013-01-01 16:00:00", tz=tz), ), Interval( Timestamp("2013-01-01 16:00:00", tz=tz), Timestamp("2013-01-02 08:00:00", tz=tz), ), Interval( Timestamp("2013-01-02 08:00:00", tz=tz), Timestamp("2013-01-03 00:00:00", tz=tz), ), ])).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected)
def test_datetime_tz_qcut(bins): # see gh-19872 tz = "US/Eastern" ser = Series(date_range("20130101", periods=3, tz=tz)) result = qcut(ser, bins) expected = Series( IntervalIndex( [ Interval( Timestamp("2012-12-31 23:59:59.999999999", tz=tz), Timestamp("2013-01-01 16:00:00", tz=tz), ), Interval( Timestamp("2013-01-01 16:00:00", tz=tz), Timestamp("2013-01-02 08:00:00", tz=tz), ), Interval( Timestamp("2013-01-02 08:00:00", tz=tz), Timestamp("2013-01-03 00:00:00", tz=tz), ), ] ) ).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected)
def test_loc_scalar(self): dtype = CDT(list("cab")) result = self.df.loc["a"] bidx = Series(list("aaa"), name="B").astype(dtype) assert bidx.dtype == dtype expected = DataFrame({"A": [0, 1, 5]}, index=Index(bidx)) tm.assert_frame_equal(result, expected) df = self.df.copy() df.loc["a"] = 20 bidx2 = Series(list("aabbca"), name="B").astype(dtype) assert bidx2.dtype == dtype expected = DataFrame( { "A": [20, 20, 2, 3, 4, 20], }, index=Index(bidx2), ) tm.assert_frame_equal(df, expected) # value not in the categories with pytest.raises(KeyError, match=r"^'d'$"): df.loc["d"] df2 = df.copy() expected = df2.copy() expected.index = expected.index.astype(object) expected.loc["d"] = 10 df2.loc["d"] = 10 tm.assert_frame_equal(df2, expected)
def test_datetime_cut(self): # GH 14714 # testing for time data to be present as series data = to_datetime(Series(['2013-01-01', '2013-01-02', '2013-01-03'])) result, bins = cut(data, 3, retbins=True) expected = ( Series(IntervalIndex([ Interval(Timestamp('2012-12-31 23:57:07.200000'), Timestamp('2013-01-01 16:00:00')), Interval(Timestamp('2013-01-01 16:00:00'), Timestamp('2013-01-02 08:00:00')), Interval(Timestamp('2013-01-02 08:00:00'), Timestamp('2013-01-03 00:00:00'))])) .astype(CDT(ordered=True))) tm.assert_series_equal(result, expected) # testing for time data to be present as list data = [np.datetime64('2013-01-01'), np.datetime64('2013-01-02'), np.datetime64('2013-01-03')] result, bins = cut(data, 3, retbins=True) tm.assert_series_equal(Series(result), expected) # testing for time data to be present as ndarray data = np.array([np.datetime64('2013-01-01'), np.datetime64('2013-01-02'), np.datetime64('2013-01-03')]) result, bins = cut(data, 3, retbins=True) tm.assert_series_equal(Series(result), expected) # testing for time data to be present as datetime index data = DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03']) result, bins = cut(data, 3, retbins=True) tm.assert_series_equal(Series(result), expected)
def test_series_ret_bins(): # see gh-8589 ser = Series(np.arange(4)) result, bins = cut(ser, 2, retbins=True) expected = Series(IntervalIndex.from_breaks( [-0.003, 1.5, 3], closed="right").repeat(2)).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected)
def test_qcut_return_intervals(self): s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8]) res = qcut(s, [0, 0.333, 0.666, 1]) exp_levels = np.array([Interval(-0.001, 2.664), Interval(2.664, 5.328), Interval(5.328, 8)]) exp = Series(exp_levels.take([0, 0, 0, 1, 1, 1, 2, 2, 2])).astype( CDT(ordered=True)) tm.assert_series_equal(res, exp)
def test_cut_return_intervals(self): s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8]) res = cut(s, 3) exp_bins = np.linspace(0, 8, num=4).round(3) exp_bins[0] -= 0.008 exp = Series(IntervalIndex.from_breaks(exp_bins, closed='right').take( [0, 0, 0, 1, 1, 1, 2, 2, 2])).astype(CDT(ordered=True)) tm.assert_series_equal(res, exp)
def df2(): return DataFrame( { "A": np.arange(6, dtype="int64"), }, index=CategoricalIndex(list("aabbca"), dtype=CDT(list("cabe")), name="B"), )
def test_single_quantile(data, start, end, length, labels): # see gh-15431 ser = Series([data] * length) result = qcut(ser, 1, labels=labels) if labels is None: intervals = IntervalIndex([Interval(start, end)] * length, closed="right") expected = Series(intervals).astype(CDT(ordered=True)) else: expected = Series([0] * length) tm.assert_series_equal(result, expected)
def test_datetime_bin(conv): data = [np.datetime64("2012-12-13"), np.datetime64("2012-12-15")] bin_data = ["2012-12-12", "2012-12-14", "2012-12-16"] expected = Series(IntervalIndex([ Interval(Timestamp(bin_data[0]), Timestamp(bin_data[1])), Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2]))])).astype( CDT(ordered=True)) bins = [conv(v) for v in bin_data] result = Series(cut(data, bins=bins)) tm.assert_series_equal(result, expected)
def test_loc_scalar(self): result = self.df.loc["a"] expected = DataFrame( {"A": [0, 1, 5], "B": (Series(list("aaa")).astype(CDT(list("cab"))))} ).set_index("B") tm.assert_frame_equal(result, expected) df = self.df.copy() df.loc["a"] = 20 expected = DataFrame( { "A": [20, 20, 2, 3, 4, 20], "B": (Series(list("aabbca")).astype(CDT(list("cab")))), } ).set_index("B") tm.assert_frame_equal(df, expected) # value not in the categories with pytest.raises(KeyError, match=r"^'d'$"): df.loc["d"] msg = "cannot append a non-category item to a CategoricalIndex" with pytest.raises(TypeError, match=msg): df.loc["d"] = 10 msg = ( "cannot insert an item into a CategoricalIndex that is not " "already an existing category" ) with pytest.raises(TypeError, match=msg): df.loc["d", "A"] = 10 with pytest.raises(TypeError, match=msg): df.loc["d", "C"] = 10 msg = ( r"cannot do label indexing on <class 'pandas\.core\.indexes\.category" r"\.CategoricalIndex'> with these indexers \[1\] of <class 'int'>" ) with pytest.raises(TypeError, match=msg): df.loc[1]
def test_single_quantile(self): # issue 15431 expected = Series([0, 0]) s = Series([9., 9.]) result = qcut(s, 1, labels=False) tm.assert_series_equal(result, expected) result = qcut(s, 1) intervals = IntervalIndex([Interval(8.999, 9.0), Interval(8.999, 9.0)], closed='right') expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected) s = Series([-9., -9.]) expected = Series([0, 0]) result = qcut(s, 1, labels=False) tm.assert_series_equal(result, expected) result = qcut(s, 1) intervals = IntervalIndex([Interval(-9.001, -9.0), Interval(-9.001, -9.0)], closed='right') expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected) s = Series([0., 0.]) expected = Series([0, 0]) result = qcut(s, 1, labels=False) tm.assert_series_equal(result, expected) result = qcut(s, 1) intervals = IntervalIndex([Interval(-0.001, 0.0), Interval(-0.001, 0.0)], closed='right') expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected) s = Series([9]) expected = Series([0]) result = qcut(s, 1, labels=False) tm.assert_series_equal(result, expected) result = qcut(s, 1) intervals = IntervalIndex([Interval(8.999, 9.0)], closed='right') expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected) s = Series([-9]) expected = Series([0]) result = qcut(s, 1, labels=False) tm.assert_series_equal(result, expected) result = qcut(s, 1) intervals = IntervalIndex([Interval(-9.001, -9.0)], closed='right') expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected) s = Series([0]) expected = Series([0]) result = qcut(s, 1, labels=False) tm.assert_series_equal(result, expected) result = qcut(s, 1) intervals = IntervalIndex([Interval(-0.001, 0.0)], closed='right') expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected)
def test_datetime_cut(data): # see gh-14714 # # Testing time data when it comes in various collection types. result, _ = cut(data, 3, retbins=True) expected = Series(IntervalIndex([ Interval(Timestamp("2012-12-31 23:57:07.200000"), Timestamp("2013-01-01 16:00:00")), Interval(Timestamp("2013-01-01 16:00:00"), Timestamp("2013-01-02 08:00:00")), Interval(Timestamp("2013-01-02 08:00:00"), Timestamp("2013-01-03 00:00:00"))])).astype(CDT(ordered=True)) tm.assert_series_equal(Series(result), expected)
def test_cut_return_intervals(): ser = Series([0, 1, 2, 3, 4, 5, 6, 7, 8]) result = cut(ser, 3) exp_bins = np.linspace(0, 8, num=4).round(3) exp_bins[0] -= 0.008 expected = Series( IntervalIndex.from_breaks(exp_bins, inclusive="right").take( [0, 0, 0, 1, 1, 1, 2, 2, 2] ) ).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected)
def test_datetimetz_qcut(self, bins): # GH 19872 tz = 'US/Eastern' s = Series(date_range('20130101', periods=3, tz=tz)) result = qcut(s, bins) expected = ( Series(IntervalIndex([ Interval(Timestamp('2012-12-31 23:59:59.999999999', tz=tz), Timestamp('2013-01-01 16:00:00', tz=tz)), Interval(Timestamp('2013-01-01 16:00:00', tz=tz), Timestamp('2013-01-02 08:00:00', tz=tz)), Interval(Timestamp('2013-01-02 08:00:00', tz=tz), Timestamp('2013-01-03 00:00:00', tz=tz))])) .astype(CDT(ordered=True))) tm.assert_series_equal(result, expected)
def test_datetimetz_cut(self, bins, box): # GH 19872 tz = 'US/Eastern' s = Series(date_range('20130101', periods=3, tz=tz)) if not isinstance(bins, int): bins = box(bins) result = cut(s, bins) expected = ( Series(IntervalIndex([ Interval(Timestamp('2012-12-31 23:57:07.200000', tz=tz), Timestamp('2013-01-01 16:00:00', tz=tz)), Interval(Timestamp('2013-01-01 16:00:00', tz=tz), Timestamp('2013-01-02 08:00:00', tz=tz)), Interval(Timestamp('2013-01-02 08:00:00', tz=tz), Timestamp('2013-01-03 00:00:00', tz=tz))])) .astype(CDT(ordered=True))) tm.assert_series_equal(result, expected)
def test_datetime_bin(self): data = [np.datetime64('2012-12-13'), np.datetime64('2012-12-15')] bin_data = ['2012-12-12', '2012-12-14', '2012-12-16'] expected = (Series( IntervalIndex.from_intervals([ Interval(Timestamp(bin_data[0]), Timestamp(bin_data[1])), Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2])) ])).astype(CDT(ordered=True))) for conv in [Timestamp, Timestamp, np.datetime64]: bins = [conv(v) for v in bin_data] result = cut(data, bins=bins) tm.assert_series_equal(Series(result), expected) bin_pydatetime = [Timestamp(v).to_pydatetime() for v in bin_data] result = cut(data, bins=bin_pydatetime) tm.assert_series_equal(Series(result), expected) bins = to_datetime(bin_data) result = cut(data, bins=bin_pydatetime) tm.assert_series_equal(Series(result), expected)
def test_reindexing(self): df = DataFrame( { "A": np.arange(3, dtype="int64"), "B": Series(list("abc")).astype(CDT(list("cabe"))), } ).set_index("B") # reindexing # convert to a regular index result = df.reindex(["a", "b", "e"]) expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index( "B" ) tm.assert_frame_equal(result, expected, check_index_type=True) result = df.reindex(["a", "b"]) expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B") tm.assert_frame_equal(result, expected, check_index_type=True) result = df.reindex(["e"]) expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B") tm.assert_frame_equal(result, expected, check_index_type=True) result = df.reindex(["d"]) expected = DataFrame({"A": [np.nan], "B": Series(["d"])}).set_index("B") tm.assert_frame_equal(result, expected, check_index_type=True) # since we are actually reindexing with a Categorical # then return a Categorical cats = list("cabe") result = df.reindex(Categorical(["a", "e"], categories=cats)) expected = DataFrame( {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats))} ).set_index("B") tm.assert_frame_equal(result, expected, check_index_type=True) result = df.reindex(Categorical(["a"], categories=cats)) expected = DataFrame( {"A": [0], "B": Series(list("a")).astype(CDT(cats))} ).set_index("B") tm.assert_frame_equal(result, expected, check_index_type=True) result = df.reindex(["a", "b", "e"]) expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index( "B" ) tm.assert_frame_equal(result, expected, check_index_type=True) result = df.reindex(["a", "b"]) expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B") tm.assert_frame_equal(result, expected, check_index_type=True) result = df.reindex(["e"]) expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B") tm.assert_frame_equal(result, expected, check_index_type=True) # give back the type of categorical that we received result = df.reindex(Categorical(["a", "e"], categories=cats, ordered=True)) expected = DataFrame( {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats, ordered=True))} ).set_index("B") tm.assert_frame_equal(result, expected, check_index_type=True) result = df.reindex(Categorical(["a", "d"], categories=["a", "d"])) expected = DataFrame( {"A": [0, np.nan], "B": Series(list("ad")).astype(CDT(["a", "d"]))} ).set_index("B") tm.assert_frame_equal(result, expected, check_index_type=True) # passed duplicate indexers are not allowed msg = "cannot reindex from a duplicate axis" with pytest.raises(ValueError, match=msg): self.df2.reindex(["a", "b"]) # args NotImplemented ATM msg = r"argument {} is not implemented for CategoricalIndex\.reindex" with pytest.raises(NotImplementedError, match=msg.format("method")): df.reindex(["a"], method="ffill") with pytest.raises(NotImplementedError, match=msg.format("level")): df.reindex(["a"], level=1) with pytest.raises(NotImplementedError, match=msg.format("limit")): df.reindex(["a"], limit=2)
def test_getitem_bool_mask_categorical_index(self): df3 = DataFrame( { "A": np.arange(6, dtype="int64"), }, index=CategoricalIndex( [1, 1, 2, 1, 3, 2], dtype=CDT([3, 2, 1], ordered=True), name="B" ), ) df4 = DataFrame( { "A": np.arange(6, dtype="int64"), }, index=CategoricalIndex( [1, 1, 2, 1, 3, 2], dtype=CDT([3, 2, 1], ordered=False), name="B" ), ) result = df3[df3.index == "a"] expected = df3.iloc[[]] tm.assert_frame_equal(result, expected) result = df4[df4.index == "a"] expected = df4.iloc[[]] tm.assert_frame_equal(result, expected) result = df3[df3.index == 1] expected = df3.iloc[[0, 1, 3]] tm.assert_frame_equal(result, expected) result = df4[df4.index == 1] expected = df4.iloc[[0, 1, 3]] tm.assert_frame_equal(result, expected) # since we have an ordered categorical # CategoricalIndex([1, 1, 2, 1, 3, 2], # categories=[3, 2, 1], # ordered=True, # name='B') result = df3[df3.index < 2] expected = df3.iloc[[4]] tm.assert_frame_equal(result, expected) result = df3[df3.index > 1] expected = df3.iloc[[]] tm.assert_frame_equal(result, expected) # unordered # cannot be compared # CategoricalIndex([1, 1, 2, 1, 3, 2], # categories=[3, 2, 1], # ordered=False, # name='B') msg = "Unordered Categoricals can only compare equality or not" with pytest.raises(TypeError, match=msg): df4[df4.index < 2] with pytest.raises(TypeError, match=msg): df4[df4.index > 1]
def test_reindexing(self): # reindexing # convert to a regular index result = self.df2.reindex(['a', 'b', 'e']) expected = DataFrame({ 'A': [0, 1, 5, 2, 3, np.nan], 'B': Series(list('aaabbe')) }).set_index('B') assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(['a', 'b']) expected = DataFrame({ 'A': [0, 1, 5, 2, 3], 'B': Series(list('aaabb')) }).set_index('B') assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(['e']) expected = DataFrame({ 'A': [np.nan], 'B': Series(['e']) }).set_index('B') assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(['d']) expected = DataFrame({ 'A': [np.nan], 'B': Series(['d']) }).set_index('B') assert_frame_equal(result, expected, check_index_type=True) # since we are actually reindexing with a Categorical # then return a Categorical cats = list('cabe') result = self.df2.reindex(Categorical(['a', 'd'], categories=cats)) expected = DataFrame({ 'A': [0, 1, 5, np.nan], 'B': Series(list('aaad')).astype(CDT(cats)) }).set_index('B') assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(Categorical(['a'], categories=cats)) expected = DataFrame({ 'A': [0, 1, 5], 'B': Series(list('aaa')).astype(CDT(cats)) }).set_index('B') assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(['a', 'b', 'e']) expected = DataFrame({ 'A': [0, 1, 5, 2, 3, np.nan], 'B': Series(list('aaabbe')) }).set_index('B') assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(['a', 'b']) expected = DataFrame({ 'A': [0, 1, 5, 2, 3], 'B': Series(list('aaabb')) }).set_index('B') assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(['e']) expected = DataFrame({ 'A': [np.nan], 'B': Series(['e']) }).set_index('B') assert_frame_equal(result, expected, check_index_type=True) # give back the type of categorical that we received result = self.df2.reindex( Categorical(['a', 'd'], categories=cats, ordered=True)) expected = DataFrame({ 'A': [0, 1, 5, np.nan], 'B': Series(list('aaad')).astype(CDT(cats, ordered=True)) }).set_index('B') assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex( Categorical(['a', 'd'], categories=['a', 'd'])) expected = DataFrame({ 'A': [0, 1, 5, np.nan], 'B': Series(list('aaad')).astype(CDT(['a', 'd'])) }).set_index('B') assert_frame_equal(result, expected, check_index_type=True) # passed duplicate indexers are not allowed msg = "cannot reindex with a non-unique indexer" with pytest.raises(ValueError, match=msg): self.df2.reindex(['a', 'a']) # args NotImplemented ATM msg = r"argument {} is not implemented for CategoricalIndex\.reindex" with pytest.raises(NotImplementedError, match=msg.format('method')): self.df2.reindex(['a'], method='ffill') with pytest.raises(NotImplementedError, match=msg.format('level')): self.df2.reindex(['a'], level=1) with pytest.raises(NotImplementedError, match=msg.format('limit')): self.df2.reindex(['a'], limit=2)