def test_between_time(self, inclusive_endpoints_fixture, frame_or_series): rng = date_range("1/1/2000", "1/5/2000", freq="5min") ts = DataFrame(np.random.randn(len(rng), 2), index=rng) ts = tm.get_obj(ts, frame_or_series) stime = time(0, 0) etime = time(1, 0) inclusive = inclusive_endpoints_fixture filtered = ts.between_time(stime, etime, inclusive=inclusive) exp_len = 13 * 4 + 1 if inclusive in ["right", "neither"]: exp_len -= 5 if inclusive in ["left", "neither"]: exp_len -= 4 assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() if inclusive in ["left", "both"]: assert t >= stime else: assert t > stime if inclusive in ["right", "both"]: assert t <= etime else: assert t < etime result = ts.between_time("00:00", "01:00") expected = ts.between_time(stime, etime) tm.assert_equal(result, expected) # across midnight rng = date_range("1/1/2000", "1/5/2000", freq="5min") ts = DataFrame(np.random.randn(len(rng), 2), index=rng) ts = tm.get_obj(ts, frame_or_series) stime = time(22, 0) etime = time(9, 0) filtered = ts.between_time(stime, etime, inclusive=inclusive) exp_len = (12 * 11 + 1) * 4 + 1 if inclusive in ["right", "neither"]: exp_len -= 4 if inclusive in ["left", "neither"]: exp_len -= 4 assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() if inclusive in ["left", "both"]: assert (t >= stime) or (t <= etime) else: assert (t > stime) or (t <= etime) if inclusive in ["right", "both"]: assert (t <= etime) or (t >= stime) else: assert (t < etime) or (t >= stime)
def test_tz_localize(self, frame_or_series): rng = date_range("1/1/2011", periods=100, freq="H") obj = DataFrame({"a": 1}, index=rng) obj = tm.get_obj(obj, frame_or_series) result = obj.tz_localize("utc") expected = DataFrame({"a": 1}, rng.tz_localize("UTC")) expected = tm.get_obj(expected, frame_or_series) assert result.index.tz.zone == "UTC" tm.assert_equal(result, expected)
def test_tz_convert(self, frame_or_series): rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern") obj = DataFrame({"a": 1}, index=rng) obj = tm.get_obj(obj, frame_or_series) result = obj.tz_convert("Europe/Berlin") expected = DataFrame({"a": 1}, rng.tz_convert("Europe/Berlin")) expected = tm.get_obj(expected, frame_or_series) assert result.index.tz.zone == "Europe/Berlin" tm.assert_equal(result, expected)
def test_mask_pos_args_deprecation(self, frame_or_series): # https://github.com/pandas-dev/pandas/issues/41485 obj = DataFrame({"a": range(5)}) expected = DataFrame({"a": [-1, 1, -1, 3, -1]}) obj = tm.get_obj(obj, frame_or_series) expected = tm.get_obj(expected, frame_or_series) cond = obj % 2 == 0 msg = (r"In a future version of pandas all arguments of " f"{frame_or_series.__name__}.mask except for " r"the arguments 'cond' and 'other' will be keyword-only") with tm.assert_produces_warning(FutureWarning, match=msg): result = obj.mask(cond, -1, False) tm.assert_equal(result, expected)
def test_tshift(self, datetime_frame, frame_or_series): # TODO(2.0): remove this test when tshift deprecation is enforced # PeriodIndex ps = tm.makePeriodFrame() ps = tm.get_obj(ps, frame_or_series) shifted = ps.tshift(1) unshifted = shifted.tshift(-1) tm.assert_equal(unshifted, ps) shifted2 = ps.tshift(freq="B") tm.assert_equal(shifted, shifted2) shifted3 = ps.tshift(freq=offsets.BDay()) tm.assert_equal(shifted, shifted3) msg = "Given freq M does not match PeriodIndex freq B" with pytest.raises(ValueError, match=msg): ps.tshift(freq="M") # DatetimeIndex dtobj = tm.get_obj(datetime_frame, frame_or_series) shifted = dtobj.tshift(1) unshifted = shifted.tshift(-1) tm.assert_equal(dtobj, unshifted) shifted2 = dtobj.tshift(freq=dtobj.index.freq) tm.assert_equal(shifted, shifted2) inferred_ts = DataFrame( datetime_frame.values, Index(np.asarray(datetime_frame.index)), columns=datetime_frame.columns, ) inferred_ts = tm.get_obj(inferred_ts, frame_or_series) shifted = inferred_ts.tshift(1) expected = dtobj.tshift(1) expected.index = expected.index._with_freq(None) tm.assert_equal(shifted, expected) unshifted = shifted.tshift(-1) tm.assert_equal(unshifted, inferred_ts) no_freq = dtobj.iloc[[0, 5, 7]] msg = "Freq was not set in the index hence cannot be inferred" with pytest.raises(ValueError, match=msg): no_freq.tshift()
def test_slice_shift_deprecated(self, frame_or_series): # GH 37601 obj = DataFrame({"A": [1, 2, 3, 4]}) obj = tm.get_obj(obj, frame_or_series) with tm.assert_produces_warning(FutureWarning): obj.slice_shift()
def test_maybe_cast_slice_bound(self, make_range, frame_or_series): idx = make_range(start="2013/10/01", freq="D", periods=10) obj = DataFrame({"units": [100 + i for i in range(10)]}, index=idx) obj = tm.get_obj(obj, frame_or_series) msg = ( f"cannot do slice indexing on {type(idx).__name__} with " r"these indexers \[foo\] of type str" ) # Check the lower-level calls are raising where expected. with pytest.raises(TypeError, match=msg): idx._maybe_cast_slice_bound("foo", "left") with pytest.raises(TypeError, match=msg): idx.get_slice_bound("foo", "left") with pytest.raises(TypeError, match=msg): obj["2013/09/30":"foo"] with pytest.raises(TypeError, match=msg): obj["foo":"2013/09/30"] with pytest.raises(TypeError, match=msg): obj.loc["2013/09/30":"foo"] with pytest.raises(TypeError, match=msg): obj.loc["foo":"2013/09/30"]
def test_pipe_tuple(self, frame_or_series): obj = DataFrame({"A": [1, 2, 3]}) obj = tm.get_obj(obj, frame_or_series) f = lambda x, y: y result = obj.pipe((f, "y"), 0) tm.assert_equal(result, obj)
def test_at_time_nonexistent(self, frame_or_series): # time doesn't exist rng = date_range("1/1/2012", freq="23Min", periods=384) ts = DataFrame(np.random.randn(len(rng)), rng) ts = tm.get_obj(ts, frame_or_series) rs = ts.at_time("16:00") assert len(rs) == 0
def test_transform_bad_dtype(op, frame_or_series, request): # GH 35964 if op == "rank": request.node.add_marker( pytest.mark.xfail( raises=ValueError, reason="GH 40418: rank does not raise a TypeError")) elif op == "ngroup": request.node.add_marker( pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")) obj = DataFrame({"A": 3 * [object] }) # DataFrame that will fail on most transforms obj = tm.get_obj(obj, frame_or_series) # tshift is deprecated warn = None if op != "tshift" else FutureWarning with tm.assert_produces_warning(warn): with pytest.raises(TypeError, match="unsupported operand|not supported"): obj.transform(op) with pytest.raises(TypeError, match="Transform function failed"): obj.transform([op]) with pytest.raises(TypeError, match="Transform function failed"): obj.transform({"A": op}) with pytest.raises(TypeError, match="Transform function failed"): obj.transform({"A": [op]})
def test_datetime_frame_shift_with_freq_error(self, datetime_frame, frame_or_series): dtobj = tm.get_obj(datetime_frame, frame_or_series) no_freq = dtobj.iloc[[0, 5, 7]] msg = "Freq was not set in the index hence cannot be inferred" with pytest.raises(ValueError, match=msg): no_freq.shift(freq="infer")
def test_repr_unicode_level_names(self, frame_or_series): index = MultiIndex.from_tuples([(0, 0), (1, 1)], names=["\u0394", "i1"]) obj = DataFrame(np.random.randn(2, 4), index=index) obj = tm.get_obj(obj, frame_or_series) repr(obj)
def test_shift_with_periodindex(self, frame_or_series): # Shifting with PeriodIndex ps = tm.makePeriodFrame() ps = tm.get_obj(ps, frame_or_series) shifted = ps.shift(1) unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, ps.index) tm.assert_index_equal(unshifted.index, ps.index) if frame_or_series is DataFrame: tm.assert_numpy_array_equal(unshifted.iloc[:, 0].dropna().values, ps.iloc[:-1, 0].values) else: tm.assert_numpy_array_equal(unshifted.dropna().values, ps.values[:-1]) shifted2 = ps.shift(1, "B") shifted3 = ps.shift(1, offsets.BDay()) tm.assert_equal(shifted2, shifted3) tm.assert_equal(ps, shifted2.shift(-1, "B")) msg = "does not match PeriodIndex freq" with pytest.raises(ValueError, match=msg): ps.shift(freq="D") # legacy support shifted4 = ps.shift(1, freq="B") tm.assert_equal(shifted2, shifted4) shifted5 = ps.shift(1, freq=offsets.BDay()) tm.assert_equal(shifted5, shifted4)
def test_at_time_raises(self, frame_or_series): # GH#20725 obj = DataFrame([[1, 2, 3], [4, 5, 6]]) obj = tm.get_obj(obj, frame_or_series) msg = "Index must be DatetimeIndex" with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex obj.at_time("00:00")
def test_at_time_midnight(self, frame_or_series): # midnight, everything rng = date_range("1/1/2000", "1/31/2000") ts = DataFrame(np.random.randn(len(rng), 3), index=rng) ts = tm.get_obj(ts, frame_or_series) result = ts.at_time(time(0, 0)) tm.assert_equal(result, ts)
def test_iloc_getitem_invalid_scalar(self, frame_or_series): # GH 21982 obj = DataFrame(np.arange(100).reshape(10, 10)) obj = tm.get_obj(obj, frame_or_series) with pytest.raises(TypeError, match="Cannot index by location index"): obj.iloc["a"]
def test_between_time_types(self, frame_or_series): # GH11818 rng = date_range("1/1/2000", "1/5/2000", freq="5min") obj = DataFrame({"A": 0}, index=rng) obj = tm.get_obj(obj, frame_or_series) msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time" with pytest.raises(ValueError, match=msg): obj.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
def test_truncate_multiindex(self, frame_or_series): # GH 34564 mi = pd.MultiIndex.from_product([[1, 2, 3, 4], ["A", "B"]], names=["L1", "L2"]) s1 = DataFrame(range(mi.shape[0]), index=mi, columns=["col"]) s1 = tm.get_obj(s1, frame_or_series) result = s1.truncate(before=2, after=3) df = DataFrame.from_dict({ "L1": [2, 2, 3, 3], "L2": ["A", "B", "A", "B"], "col": [2, 3, 4, 5] }) expected = df.set_index(["L1", "L2"]) expected = tm.get_obj(expected, frame_or_series) tm.assert_equal(result, expected)
def test_where_try_cast_deprecated(frame_or_series): obj = DataFrame(np.random.randn(4, 3)) obj = tm.get_obj(obj, frame_or_series) mask = obj > 0 with tm.assert_produces_warning(FutureWarning): # try_cast keyword deprecated obj.where(mask, -1, try_cast=False)
def test_asfreq_keep_index_name(self, frame_or_series): # GH#9854 index_name = "bar" index = date_range("20130101", periods=20, name=index_name) obj = DataFrame(list(range(20)), columns=["foo"], index=index) obj = tm.get_obj(obj, frame_or_series) assert index_name == obj.index.name assert index_name == obj.asfreq("10D").index.name
def test_pipe_tuple_error(self, frame_or_series): obj = DataFrame({"A": [1, 2, 3]}) obj = tm.get_obj(obj, frame_or_series) f = lambda x, y: y msg = "y is both the pipe target and a keyword argument" with pytest.raises(ValueError, match=msg): obj.pipe((f, "y"), x=1, y=0)
def test_append_multiindex(self, multiindex_dataframe_random_data, frame_or_series): obj = multiindex_dataframe_random_data obj = tm.get_obj(obj, frame_or_series) a = obj[:5] b = obj[5:] result = a.append(b) tm.assert_equal(result, obj)
def test_agg_multiple_levels( self, multiindex_year_month_day_dataframe_random_data, frame_or_series ): ymd = multiindex_year_month_day_dataframe_random_data ymd = tm.get_obj(ymd, frame_or_series) with tm.assert_produces_warning(FutureWarning): result = ymd.sum(level=["year", "month"]) expected = ymd.groupby(level=["year", "month"]).sum() tm.assert_equal(result, expected)
def test_truncate_nonsortedindex(self, frame_or_series): # GH#17935 obj = DataFrame({"A": ["a", "b", "c", "d", "e"]}, index=[5, 3, 2, 9, 0]) obj = tm.get_obj(obj, frame_or_series) msg = "truncate requires a sorted index" with pytest.raises(ValueError, match=msg): obj.truncate(before=3, after=9)
def test_transform_reducer_raises(all_reductions, frame_or_series, op_wrapper): # GH 35964 op = op_wrapper(all_reductions) obj = DataFrame({"A": [1, 2, 3]}) obj = tm.get_obj(obj, frame_or_series) msg = "Function did not transform" with pytest.raises(ValueError, match=msg): obj.transform(op)
def test_first_subset(self, frame_or_series): ts = tm.makeTimeDataFrame(freq="12h") ts = tm.get_obj(ts, frame_or_series) result = ts.first("10d") assert len(result) == 20 ts = tm.makeTimeDataFrame(freq="D") ts = tm.get_obj(ts, frame_or_series) result = ts.first("10d") assert len(result) == 10 result = ts.first("3M") expected = ts[:"3/31/2000"] tm.assert_equal(result, expected) result = ts.first("21D") expected = ts[:21] tm.assert_equal(result, expected) result = ts[:0].first("3M") tm.assert_equal(result, ts[:0])
def test_last_subset(self, frame_or_series): ts = tm.makeTimeDataFrame(freq="12h") ts = tm.get_obj(ts, frame_or_series) result = ts.last("10d") assert len(result) == 20 ts = tm.makeTimeDataFrame(nper=30, freq="D") ts = tm.get_obj(ts, frame_or_series) result = ts.last("10d") assert len(result) == 10 result = ts.last("21D") expected = ts["2000-01-10":] tm.assert_equal(result, expected) result = ts.last("21D") expected = ts[-21:] tm.assert_equal(result, expected) result = ts[:0].last("3M") tm.assert_equal(result, ts[:0])
def test_where_mask_deprecated(frame_or_series): # GH 47728 obj = DataFrame(np.random.randn(4, 3)) obj = tm.get_obj(obj, frame_or_series) mask = obj > 0 with tm.assert_produces_warning(FutureWarning): obj.where(mask, -1, errors="raise") with tm.assert_produces_warning(FutureWarning): obj.mask(mask, -1, errors="raise")
def test_at_time(self, frame_or_series): rng = date_range("1/1/2000", "1/5/2000", freq="5min") ts = DataFrame(np.random.randn(len(rng), 2), index=rng) ts = tm.get_obj(ts, frame_or_series) rs = ts.at_time(rng[1]) assert (rs.index.hour == rng[1].hour).all() assert (rs.index.minute == rng[1].minute).all() assert (rs.index.second == rng[1].second).all() result = ts.at_time("9:30") expected = ts.at_time(time(9, 30)) tm.assert_equal(result, expected)
def test_reorder_levels(self, frame_or_series): index = MultiIndex( levels=[["bar"], ["one", "two", "three"], [0, 1]], codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], names=["L0", "L1", "L2"], ) df = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=index) obj = tm.get_obj(df, frame_or_series) # no change, position result = obj.reorder_levels([0, 1, 2]) tm.assert_equal(obj, result) # no change, labels result = obj.reorder_levels(["L0", "L1", "L2"]) tm.assert_equal(obj, result) # rotate, position result = obj.reorder_levels([1, 2, 0]) e_idx = MultiIndex( levels=[["one", "two", "three"], [0, 1], ["bar"]], codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1], [0, 0, 0, 0, 0, 0]], names=["L1", "L2", "L0"], ) expected = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=e_idx) expected = tm.get_obj(expected, frame_or_series) tm.assert_equal(result, expected) result = obj.reorder_levels([0, 0, 0]) e_idx = MultiIndex( levels=[["bar"], ["bar"], ["bar"]], codes=[[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]], names=["L0", "L0", "L0"], ) expected = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=e_idx) expected = tm.get_obj(expected, frame_or_series) tm.assert_equal(result, expected) result = obj.reorder_levels(["L0", "L0", "L0"]) tm.assert_equal(result, expected)