def test_apply_reduce_Series(float_frame): float_frame["A"].iloc[::2] = np.nan expected = float_frame.mean(1) result = float_frame.apply(np.mean, axis=1) tm.assert_series_equal(result, expected)
def test_ufunc_at(self): s = pd.Series([0, 1, 2], index=[1, 2, 3], name="x") np.add.at(s, [0, 2], 10) expected = pd.Series([10, 1, 12], index=[1, 2, 3], name="x") tm.assert_series_equal(s, expected)
def test_multiindex_assignment(self): # GH3777 part 2 # mixed dtype df = DataFrame( np.random.randint(5, 10, size=9).reshape(3, 3), columns=list("abc"), index=[[4, 4, 8], [8, 10, 12]], ) df["d"] = np.nan arr = np.array([0.0, 1.0]) df.loc[4, "d"] = arr tm.assert_series_equal(df.loc[4, "d"], Series(arr, index=[8, 10], name="d")) # single dtype df = DataFrame( np.random.randint(5, 10, size=9).reshape(3, 3), columns=list("abc"), index=[[4, 4, 8], [8, 10, 12]], ) df.loc[4, "c"] = arr exp = Series(arr, index=[8, 10], name="c", dtype="float64") tm.assert_series_equal(df.loc[4, "c"], exp) # scalar ok df.loc[4, "c"] = 10 exp = Series(10, index=[8, 10], name="c", dtype="float64") tm.assert_series_equal(df.loc[4, "c"], exp) # invalid assignments msg = ("cannot set using a multi-index selection indexer " "with a different length than the value") with pytest.raises(ValueError, match=msg): df.loc[4, "c"] = [0, 1, 2, 3] with pytest.raises(ValueError, match=msg): df.loc[4, "c"] = [0] # groupby example NUM_ROWS = 100 NUM_COLS = 10 col_names = [ "A" + num for num in map(str, np.arange(NUM_COLS).tolist()) ] index_cols = col_names[:5] df = DataFrame( np.random.randint(5, size=(NUM_ROWS, NUM_COLS)), dtype=np.int64, columns=col_names, ) df = df.set_index(index_cols).sort_index() grp = df.groupby(level=index_cols[:4]) df["new_col"] = np.nan f_index = np.arange(5) def f(name, df2): return Series(np.arange(df2.shape[0]), name=df2.index.values[0]).reindex(f_index) # FIXME: dont leave commented-out # TODO(wesm): unused? # new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T # we are actually operating on a copy here # but in this case, that's ok for name, df2 in grp: new_vals = np.arange(df2.shape[0]) df.loc[name, "new_col"] = new_vals
def test_iloc_exceeds_bounds(self): # GH6296 # iloc should allow indexers that exceed the bounds df = DataFrame(np.random.random_sample((20, 5)), columns=list("ABCDE")) # lists of positions should raise IndexError! msg = "positional indexers are out-of-bounds" with pytest.raises(IndexError, match=msg): df.iloc[:, [0, 1, 2, 3, 4, 5]] with pytest.raises(IndexError, match=msg): df.iloc[[1, 30]] with pytest.raises(IndexError, match=msg): df.iloc[[1, -30]] with pytest.raises(IndexError, match=msg): df.iloc[[100]] s = df["A"] with pytest.raises(IndexError, match=msg): s.iloc[[100]] with pytest.raises(IndexError, match=msg): s.iloc[[-100]] # still raise on a single indexer msg = "single positional indexer is out-of-bounds" with pytest.raises(IndexError, match=msg): df.iloc[30] with pytest.raises(IndexError, match=msg): df.iloc[-30] # GH10779 # single positive/negative indexer exceeding Series bounds should raise # an IndexError with pytest.raises(IndexError, match=msg): s.iloc[30] with pytest.raises(IndexError, match=msg): s.iloc[-30] # slices are ok result = df.iloc[:, 4:10] # 0 < start < len < stop expected = df.iloc[:, 4:] tm.assert_frame_equal(result, expected) result = df.iloc[:, -4:-10] # stop < 0 < start < len expected = df.iloc[:, :0] tm.assert_frame_equal(result, expected) result = df.iloc[:, 10:4:-1] # 0 < stop < len < start (down) expected = df.iloc[:, :4:-1] tm.assert_frame_equal(result, expected) result = df.iloc[:, 4:-10:-1] # stop < 0 < start < len (down) expected = df.iloc[:, 4::-1] tm.assert_frame_equal(result, expected) result = df.iloc[:, -10:4] # start < 0 < stop < len expected = df.iloc[:, :4] tm.assert_frame_equal(result, expected) result = df.iloc[:, 10:4] # 0 < stop < len < start expected = df.iloc[:, :0] tm.assert_frame_equal(result, expected) result = df.iloc[:, -10:-11:-1] # stop < start < 0 < len (down) expected = df.iloc[:, :0] tm.assert_frame_equal(result, expected) result = df.iloc[:, 10:11] # 0 < len < start < stop expected = df.iloc[:, :0] tm.assert_frame_equal(result, expected) # slice bounds exceeding is ok result = s.iloc[18:30] expected = s.iloc[18:] tm.assert_series_equal(result, expected) result = s.iloc[30:] expected = s.iloc[:0] tm.assert_series_equal(result, expected) result = s.iloc[30::-1] expected = s.iloc[::-1] tm.assert_series_equal(result, expected) # doc example def check(result, expected): str(result) result.dtypes tm.assert_frame_equal(result, expected) dfl = DataFrame(np.random.randn(5, 2), columns=list("AB")) check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index)) check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]]) check(dfl.iloc[4:6], dfl.iloc[[4]]) msg = "positional indexers are out-of-bounds" with pytest.raises(IndexError, match=msg): dfl.iloc[[4, 5, 6]] msg = "single positional indexer is out-of-bounds" with pytest.raises(IndexError, match=msg): dfl.iloc[:, 4]
def test_iloc_setitem_td64_values_cast_na(self, value): # GH#18586 series = Series([0, 1, 2], dtype="timedelta64[ns]") series.iloc[0] = value expected = Series([NaT, 1, 2], dtype="timedelta64[ns]") tm.assert_series_equal(series, expected)
def test_store_multiindex(setup_path): # validate multi-index names # GH 5527 with ensure_clean_store(setup_path) as store: def make_index(names=None): return MultiIndex.from_tuples( [(datetime.datetime(2013, 12, d), s, t) for d in range(1, 3) for s in range(2) for t in range(3)], names=names, ) # no names _maybe_remove(store, "df") df = DataFrame(np.zeros((12, 2)), columns=["a", "b"], index=make_index()) store.append("df", df) tm.assert_frame_equal(store.select("df"), df) # partial names _maybe_remove(store, "df") df = DataFrame( np.zeros((12, 2)), columns=["a", "b"], index=make_index(["date", None, None]), ) store.append("df", df) tm.assert_frame_equal(store.select("df"), df) # series _maybe_remove(store, "s") s = Series(np.zeros(12), index=make_index(["date", None, None])) store.append("s", s) xp = Series(np.zeros(12), index=make_index(["date", "level_1", "level_2"])) tm.assert_series_equal(store.select("s"), xp) # dup with column _maybe_remove(store, "df") df = DataFrame( np.zeros((12, 2)), columns=["a", "b"], index=make_index(["date", "a", "t"]), ) msg = "duplicate names/columns in the multi-index when storing as a table" with pytest.raises(ValueError, match=msg): store.append("df", df) # dup within level _maybe_remove(store, "df") df = DataFrame( np.zeros((12, 2)), columns=["a", "b"], index=make_index(["date", "date", "date"]), ) with pytest.raises(ValueError, match=msg): store.append("df", df) # fully names _maybe_remove(store, "df") df = DataFrame( np.zeros((12, 2)), columns=["a", "b"], index=make_index(["date", "s", "t"]), ) store.append("df", df) tm.assert_frame_equal(store.select("df"), df)
def test_shift(self, datetime_series): shifted = datetime_series.shift(1) unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, datetime_series.index) tm.assert_index_equal(unshifted.index, datetime_series.index) tm.assert_numpy_array_equal( unshifted.dropna().values, datetime_series.values[:-1] ) offset = BDay() shifted = datetime_series.shift(1, freq=offset) unshifted = shifted.shift(-1, freq=offset) tm.assert_series_equal(unshifted, datetime_series) unshifted = datetime_series.shift(0, freq=offset) tm.assert_series_equal(unshifted, datetime_series) shifted = datetime_series.shift(1, freq="B") unshifted = shifted.shift(-1, freq="B") tm.assert_series_equal(unshifted, datetime_series) # corner case unshifted = datetime_series.shift(0) tm.assert_series_equal(unshifted, datetime_series) # Shifting with PeriodIndex ps = tm.makePeriodSeries() shifted = ps.shift(1) unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, ps.index) tm.assert_index_equal(unshifted.index, ps.index) tm.assert_numpy_array_equal(unshifted.dropna().values, ps.values[:-1]) shifted2 = ps.shift(1, "B") shifted3 = ps.shift(1, BDay()) tm.assert_series_equal(shifted2, shifted3) tm.assert_series_equal(ps, shifted2.shift(-1, "B")) msg = "Given freq D does not match PeriodIndex freq B" with pytest.raises(ValueError, match=msg): ps.shift(freq="D") # legacy support shifted4 = ps.shift(1, freq="B") tm.assert_series_equal(shifted2, shifted4) shifted5 = ps.shift(1, freq=BDay()) tm.assert_series_equal(shifted5, shifted4) # 32-bit taking # GH#8129 index = date_range("2000-01-01", periods=5) for dtype in ["int32", "int64"]: s1 = Series(np.arange(5, dtype=dtype), index=index) p = s1.iloc[1] result = s1.shift(periods=p) expected = Series([np.nan, 0, 1, 2, 3], index=index) tm.assert_series_equal(result, expected) # GH#8260 # with tz s = Series( date_range("2000-01-01 09:00:00", periods=5, tz="US/Eastern"), name="foo" ) result = s - s.shift() exp = Series(TimedeltaIndex(["NaT"] + ["1 days"] * 4), name="foo") tm.assert_series_equal(result, exp) # incompat tz s2 = Series(date_range("2000-01-01 09:00:00", periods=5, tz="CET"), name="foo") msg = "DatetimeArray subtraction must have the same timezones or no timezones" with pytest.raises(TypeError, match=msg): s - s2
def _check(result, expected): if isinstance(result, Series): tm.assert_series_equal(result, expected) else: tm.assert_index_equal(result, expected)
def test_slice_replace(): values = Series(["short", "a bit longer", "evenlongerthanthat", "", np.nan]) exp = Series(["shrt", "a it longer", "evnlongerthanthat", "", np.nan]) result = values.str.slice_replace(2, 3) tm.assert_series_equal(result, exp) exp = Series(["shzrt", "a zit longer", "evznlongerthanthat", "z", np.nan]) result = values.str.slice_replace(2, 3, "z") tm.assert_series_equal(result, exp) exp = Series(["shzort", "a zbit longer", "evzenlongerthanthat", "z", np.nan]) result = values.str.slice_replace(2, 2, "z") tm.assert_series_equal(result, exp) exp = Series(["shzort", "a zbit longer", "evzenlongerthanthat", "z", np.nan]) result = values.str.slice_replace(2, 1, "z") tm.assert_series_equal(result, exp) exp = Series(["shorz", "a bit longez", "evenlongerthanthaz", "z", np.nan]) result = values.str.slice_replace(-1, None, "z") tm.assert_series_equal(result, exp) exp = Series(["zrt", "zer", "zat", "z", np.nan]) result = values.str.slice_replace(None, -2, "z") tm.assert_series_equal(result, exp) exp = Series(["shortz", "a bit znger", "evenlozerthanthat", "z", np.nan]) result = values.str.slice_replace(6, 8, "z") tm.assert_series_equal(result, exp) exp = Series(["zrt", "a zit longer", "evenlongzerthanthat", "z", np.nan]) result = values.str.slice_replace(-10, 3, "z") tm.assert_series_equal(result, exp)
def assert_series_or_index_equal(left, right): if isinstance(left, Series): tm.assert_series_equal(left, right) else: # Index tm.assert_index_equal(left, right)
def test_ismethods(): values = ["A", "b", "Xy", "4", "3A", "", "TT", "55", "-", " "] str_s = Series(values) alnum_e = [True, True, True, True, True, False, True, True, False, False] alpha_e = [True, True, True, False, False, False, True, False, False, False] digit_e = [False, False, False, True, False, False, False, True, False, False] # TODO: unused num_e = [ # noqa False, False, False, True, False, False, False, True, False, False, ] space_e = [False, False, False, False, False, False, False, False, False, True] lower_e = [False, True, False, False, False, False, False, False, False, False] upper_e = [True, False, False, False, True, False, True, False, False, False] title_e = [True, False, True, False, True, False, False, False, False, False] tm.assert_series_equal(str_s.str.isalnum(), Series(alnum_e)) tm.assert_series_equal(str_s.str.isalpha(), Series(alpha_e)) tm.assert_series_equal(str_s.str.isdigit(), Series(digit_e)) tm.assert_series_equal(str_s.str.isspace(), Series(space_e)) tm.assert_series_equal(str_s.str.islower(), Series(lower_e)) tm.assert_series_equal(str_s.str.isupper(), Series(upper_e)) tm.assert_series_equal(str_s.str.istitle(), Series(title_e)) assert str_s.str.isalnum().tolist() == [v.isalnum() for v in values] assert str_s.str.isalpha().tolist() == [v.isalpha() for v in values] assert str_s.str.isdigit().tolist() == [v.isdigit() for v in values] assert str_s.str.isspace().tolist() == [v.isspace() for v in values] assert str_s.str.islower().tolist() == [v.islower() for v in values] assert str_s.str.isupper().tolist() == [v.isupper() for v in values] assert str_s.str.istitle().tolist() == [v.istitle() for v in values]
def test_empty_str_methods(): empty_str = empty = Series(dtype=object) empty_int = Series(dtype="int64") empty_bool = Series(dtype=bool) empty_bytes = Series(dtype=object) # GH7241 # (extract) on empty series tm.assert_series_equal(empty_str, empty.str.cat(empty)) assert "" == empty.str.cat() tm.assert_series_equal(empty_str, empty.str.title()) tm.assert_series_equal(empty_int, empty.str.count("a")) tm.assert_series_equal(empty_bool, empty.str.contains("a")) tm.assert_series_equal(empty_bool, empty.str.startswith("a")) tm.assert_series_equal(empty_bool, empty.str.endswith("a")) tm.assert_series_equal(empty_str, empty.str.lower()) tm.assert_series_equal(empty_str, empty.str.upper()) tm.assert_series_equal(empty_str, empty.str.replace("a", "b")) tm.assert_series_equal(empty_str, empty.str.repeat(3)) tm.assert_series_equal(empty_bool, empty.str.match("^a")) tm.assert_frame_equal( DataFrame(columns=[0], dtype=str), empty.str.extract("()", expand=True) ) tm.assert_frame_equal( DataFrame(columns=[0, 1], dtype=str), empty.str.extract("()()", expand=True) ) tm.assert_series_equal(empty_str, empty.str.extract("()", expand=False)) tm.assert_frame_equal( DataFrame(columns=[0, 1], dtype=str), empty.str.extract("()()", expand=False), ) tm.assert_frame_equal(DataFrame(dtype=str), empty.str.get_dummies()) tm.assert_series_equal(empty_str, empty_str.str.join("")) tm.assert_series_equal(empty_int, empty.str.len()) tm.assert_series_equal(empty_str, empty_str.str.findall("a")) tm.assert_series_equal(empty_int, empty.str.find("a")) tm.assert_series_equal(empty_int, empty.str.rfind("a")) tm.assert_series_equal(empty_str, empty.str.pad(42)) tm.assert_series_equal(empty_str, empty.str.center(42)) tm.assert_series_equal(empty_str, empty.str.split("a")) tm.assert_series_equal(empty_str, empty.str.rsplit("a")) tm.assert_series_equal(empty_str, empty.str.partition("a", expand=False)) tm.assert_series_equal(empty_str, empty.str.rpartition("a", expand=False)) tm.assert_series_equal(empty_str, empty.str.slice(stop=1)) tm.assert_series_equal(empty_str, empty.str.slice(step=1)) tm.assert_series_equal(empty_str, empty.str.strip()) tm.assert_series_equal(empty_str, empty.str.lstrip()) tm.assert_series_equal(empty_str, empty.str.rstrip()) tm.assert_series_equal(empty_str, empty.str.wrap(42)) tm.assert_series_equal(empty_str, empty.str.get(0)) tm.assert_series_equal(empty_str, empty_bytes.str.decode("ascii")) tm.assert_series_equal(empty_bytes, empty.str.encode("ascii")) # ismethods should always return boolean (GH 29624) tm.assert_series_equal(empty_bool, empty.str.isalnum()) tm.assert_series_equal(empty_bool, empty.str.isalpha()) tm.assert_series_equal(empty_bool, empty.str.isdigit()) tm.assert_series_equal(empty_bool, empty.str.isspace()) tm.assert_series_equal(empty_bool, empty.str.islower()) tm.assert_series_equal(empty_bool, empty.str.isupper()) tm.assert_series_equal(empty_bool, empty.str.istitle()) tm.assert_series_equal(empty_bool, empty.str.isnumeric()) tm.assert_series_equal(empty_bool, empty.str.isdecimal()) tm.assert_series_equal(empty_str, empty.str.capitalize()) tm.assert_series_equal(empty_str, empty.str.swapcase()) tm.assert_series_equal(empty_str, empty.str.normalize("NFC")) table = str.maketrans("a", "b") tm.assert_series_equal(empty_str, empty.str.translate(table))
def test_apply_attach_name_axis1(float_frame): result = float_frame.apply(lambda x: x.name, axis=1) expected = Series(float_frame.index, index=float_frame.index) tm.assert_series_equal(result, expected)
def test_apply_attach_name(float_frame): result = float_frame.apply(lambda x: x.name) expected = Series(float_frame.columns, index=float_frame.columns) tm.assert_series_equal(result, expected)
def test_categorical_delegations(self): # invalid accessor msg = r"Can only use \.cat accessor with a 'category' dtype" with pytest.raises(AttributeError, match=msg): Series([1, 2, 3]).cat with pytest.raises(AttributeError, match=msg): Series([1, 2, 3]).cat() with pytest.raises(AttributeError, match=msg): Series(["a", "b", "c"]).cat with pytest.raises(AttributeError, match=msg): Series(np.arange(5.0)).cat with pytest.raises(AttributeError, match=msg): Series([Timestamp("20130101")]).cat # Series should delegate calls to '.categories', '.codes', '.ordered' # and the methods '.set_categories()' 'drop_unused_categories()' to the # categorical ser = Series(Categorical(["a", "b", "c", "a"], ordered=True)) exp_categories = Index(["a", "b", "c"]) tm.assert_index_equal(ser.cat.categories, exp_categories) ser.cat.categories = [1, 2, 3] exp_categories = Index([1, 2, 3]) tm.assert_index_equal(ser.cat.categories, exp_categories) exp_codes = Series([0, 1, 2, 0], dtype="int8") tm.assert_series_equal(ser.cat.codes, exp_codes) assert ser.cat.ordered ser = ser.cat.as_unordered() assert not ser.cat.ordered return_value = ser.cat.as_ordered(inplace=True) assert return_value is None assert ser.cat.ordered # reorder ser = Series(Categorical(["a", "b", "c", "a"], ordered=True)) exp_categories = Index(["c", "b", "a"]) exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_) ser = ser.cat.set_categories(["c", "b", "a"]) tm.assert_index_equal(ser.cat.categories, exp_categories) tm.assert_numpy_array_equal(ser.values.__array__(), exp_values) tm.assert_numpy_array_equal(ser.__array__(), exp_values) # remove unused categories ser = Series(Categorical(["a", "b", "b", "a"], categories=["a", "b", "c"])) exp_categories = Index(["a", "b"]) exp_values = np.array(["a", "b", "b", "a"], dtype=np.object_) ser = ser.cat.remove_unused_categories() tm.assert_index_equal(ser.cat.categories, exp_categories) tm.assert_numpy_array_equal(ser.values.__array__(), exp_values) tm.assert_numpy_array_equal(ser.__array__(), exp_values) # This method is likely to be confused, so test that it raises an error # on wrong inputs: msg = "'Series' object has no attribute 'set_categories'" with pytest.raises(AttributeError, match=msg): ser.set_categories([4, 3, 2, 1]) # right: ser.cat.set_categories([4,3,2,1]) # GH#18862 (let Series.cat.rename_categories take callables) ser = Series(Categorical(["a", "b", "c", "a"], ordered=True)) result = ser.cat.rename_categories(lambda x: x.upper()) expected = Series( Categorical(["A", "B", "C", "A"], categories=["A", "B", "C"], ordered=True) ) tm.assert_series_equal(result, expected)
def test_str_accessor_in_apply_func(): # https://github.com/pandas-dev/pandas/issues/38979 df = DataFrame(zip("abc", "def")) expected = Series(["A/D", "B/E", "C/F"]) result = df.apply(lambda f: "/".join(f.str.upper()), axis=1) tm.assert_series_equal(result, expected)
def test_grouper_creation_bug(self): # GH 8795 df = DataFrame({"A": [0, 0, 1, 1, 2, 2], "B": [1, 2, 3, 4, 5, 6]}) g = df.groupby("A") expected = g.sum() g = df.groupby(pd.Grouper(key="A")) result = g.sum() tm.assert_frame_equal(result, expected) result = g.apply(lambda x: x.sum()) tm.assert_frame_equal(result, expected) g = df.groupby(pd.Grouper(key="A", axis=0)) result = g.sum() tm.assert_frame_equal(result, expected) # GH14334 # pd.Grouper(key=...) may be passed in a list df = DataFrame({ "A": [0, 0, 0, 1, 1, 1], "B": [1, 1, 2, 2, 3, 3], "C": [1, 2, 3, 4, 5, 6] }) # Group by single column expected = df.groupby("A").sum() g = df.groupby([pd.Grouper(key="A")]) result = g.sum() tm.assert_frame_equal(result, expected) # Group by two columns # using a combination of strings and Grouper objects expected = df.groupby(["A", "B"]).sum() # Group with two Grouper objects g = df.groupby([pd.Grouper(key="A"), pd.Grouper(key="B")]) result = g.sum() tm.assert_frame_equal(result, expected) # Group with a string and a Grouper object g = df.groupby(["A", pd.Grouper(key="B")]) result = g.sum() tm.assert_frame_equal(result, expected) # Group with a Grouper object and a string g = df.groupby([pd.Grouper(key="A"), "B"]) result = g.sum() tm.assert_frame_equal(result, expected) # GH8866 s = Series( np.arange(8, dtype="int64"), index=pd.MultiIndex.from_product( [list("ab"), range(2), date_range("20130101", periods=2)], names=["one", "two", "three"], ), ) result = s.groupby(pd.Grouper(level="three", freq="M")).sum() expected = Series([28], index=Index([Timestamp("2013-01-31")], freq="M", name="three")) tm.assert_series_equal(result, expected) # just specifying a level breaks result = s.groupby(pd.Grouper(level="one")).sum() expected = s.groupby(level="one").sum() tm.assert_series_equal(result, expected)
def test_getitem_setitem_datetimeindex(): N = 50 # testing with timezone, GH #2785 rng = date_range("1/1/1990", periods=N, freq="H", tz="US/Eastern") ts = Series(np.random.randn(N), index=rng) result = ts["1990-01-01 04:00:00"] expected = ts[4] assert result == expected result = ts.copy() result["1990-01-01 04:00:00"] = 0 result["1990-01-01 04:00:00"] = ts[4] tm.assert_series_equal(result, ts) result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"] expected = ts[4:8] tm.assert_series_equal(result, expected) result = ts.copy() result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0 result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8] tm.assert_series_equal(result, ts) lb = "1990-01-01 04:00:00" rb = "1990-01-01 07:00:00" # GH#18435 strings get a pass from tzawareness compat result = ts[(ts.index >= lb) & (ts.index <= rb)] expected = ts[4:8] tm.assert_series_equal(result, expected) lb = "1990-01-01 04:00:00-0500" rb = "1990-01-01 07:00:00-0500" result = ts[(ts.index >= lb) & (ts.index <= rb)] expected = ts[4:8] tm.assert_series_equal(result, expected) # But we do not give datetimes a pass on tzawareness compat # TODO: do the same with Timestamps and dt64 msg = "Cannot compare tz-naive and tz-aware datetime-like objects" naive = datetime(1990, 1, 1, 4) with tm.assert_produces_warning(FutureWarning): # GH#36148 will require tzawareness compat result = ts[naive] expected = ts[4] assert result == expected result = ts.copy() with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): # GH#36148 will require tzawareness compat result[datetime(1990, 1, 1, 4)] = 0 with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): # GH#36148 will require tzawareness compat result[datetime(1990, 1, 1, 4)] = ts[4] tm.assert_series_equal(result, ts) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): # GH#36148 will require tzawareness compat result = ts[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] expected = ts[4:8] tm.assert_series_equal(result, expected) result = ts.copy() with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): # GH#36148 will require tzawareness compat result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = 0 with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): # GH#36148 will require tzawareness compat result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = ts[4:8] tm.assert_series_equal(result, ts) lb = datetime(1990, 1, 1, 4) rb = datetime(1990, 1, 1, 7) msg = r"Invalid comparison between dtype=datetime64\[ns, US/Eastern\] and datetime" with pytest.raises(TypeError, match=msg): # tznaive vs tzaware comparison is invalid # see GH#18376, GH#18162 ts[(ts.index >= lb) & (ts.index <= rb)] lb = Timestamp(datetime(1990, 1, 1, 4)).tz_localize(rng.tzinfo) rb = Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo) result = ts[(ts.index >= lb) & (ts.index <= rb)] expected = ts[4:8] tm.assert_series_equal(result, expected) result = ts[ts.index[4]] expected = ts[4] assert result == expected result = ts[ts.index[4:8]] expected = ts[4:8] tm.assert_series_equal(result, expected) result = ts.copy() result[ts.index[4:8]] = 0 result.iloc[4:8] = ts.iloc[4:8] tm.assert_series_equal(result, ts) # also test partial date slicing result = ts["1990-01-02"] expected = ts[24:48] tm.assert_series_equal(result, expected) result = ts.copy() result["1990-01-02"] = 0 result["1990-01-02"] = ts[24:48] tm.assert_series_equal(result, ts)
def test_tshift(self, datetime_series): # TODO: remove this test when tshift deprecation is enforced # PeriodIndex ps = tm.makePeriodSeries() shifted = ps.tshift(1) unshifted = shifted.tshift(-1) tm.assert_series_equal(unshifted, ps) shifted2 = ps.tshift(freq="B") tm.assert_series_equal(shifted, shifted2) shifted3 = ps.tshift(freq=BDay()) tm.assert_series_equal(shifted, shifted3) msg = "Given freq M does not match PeriodIndex freq B" with pytest.raises(ValueError, match=msg): ps.tshift(freq="M") # DatetimeIndex shifted = datetime_series.tshift(1) unshifted = shifted.tshift(-1) tm.assert_series_equal(datetime_series, unshifted) shifted2 = datetime_series.tshift(freq=datetime_series.index.freq) tm.assert_series_equal(shifted, shifted2) inferred_ts = Series( datetime_series.values, Index(np.asarray(datetime_series.index)), name="ts" ) shifted = inferred_ts.tshift(1) expected = datetime_series.tshift(1) expected.index = expected.index._with_freq(None) tm.assert_series_equal(shifted, expected) unshifted = shifted.tshift(-1) tm.assert_series_equal(unshifted, inferred_ts) no_freq = datetime_series[[0, 5, 7]] msg = "Freq was not set in the index hence cannot be inferred" with pytest.raises(ValueError, match=msg): no_freq.tshift()
def test_getitem_setitem_periodindex(): N = 50 rng = period_range("1/1/1990", periods=N, freq="H") ts = Series(np.random.randn(N), index=rng) result = ts["1990-01-01 04"] expected = ts[4] assert result == expected result = ts.copy() result["1990-01-01 04"] = 0 result["1990-01-01 04"] = ts[4] tm.assert_series_equal(result, ts) result = ts["1990-01-01 04":"1990-01-01 07"] expected = ts[4:8] tm.assert_series_equal(result, expected) result = ts.copy() result["1990-01-01 04":"1990-01-01 07"] = 0 result["1990-01-01 04":"1990-01-01 07"] = ts[4:8] tm.assert_series_equal(result, ts) lb = "1990-01-01 04" rb = "1990-01-01 07" result = ts[(ts.index >= lb) & (ts.index <= rb)] expected = ts[4:8] tm.assert_series_equal(result, expected) # GH 2782 result = ts[ts.index[4]] expected = ts[4] assert result == expected result = ts[ts.index[4:8]] expected = ts[4:8] tm.assert_series_equal(result, expected) result = ts.copy() result[ts.index[4:8]] = 0 result.iloc[4:8] = ts.iloc[4:8] tm.assert_series_equal(result, ts)
def test_shift_int(self, datetime_series): ts = datetime_series.astype(int) shifted = ts.shift(1) expected = ts.astype(float).shift(1) tm.assert_series_equal(shifted, expected)
def test_groupby_average_dup_values(dups): result = dups.groupby(level=0).mean() expected = dups.groupby(dups.index).mean() tm.assert_series_equal(result, expected)
def test_indexing_zerodim_np_array(self): # GH24919 df = DataFrame([[1, 2], [3, 4]]) result = df.iloc[np.array(0)] s = Series([1, 2], name=0) tm.assert_series_equal(result, s)
def compare(slobj): result = ts2[slobj].copy() result = result.sort_index() expected = ts[slobj] expected.index = expected.index._with_freq(None) tm.assert_series_equal(result, expected)
def test_equals_op(self): # GH9947, GH10637 index_a = self.create_index() if isinstance(index_a, PeriodIndex): pytest.skip("Skip check for PeriodIndex") n = len(index_a) index_b = index_a[0:-1] index_c = index_a[0:-1].append(index_a[-2:-1]) index_d = index_a[0:1] msg = "Lengths must match|could not be broadcast" with pytest.raises(ValueError, match=msg): index_a == index_b expected1 = np.array([True] * n) expected2 = np.array([True] * (n - 1) + [False]) tm.assert_numpy_array_equal(index_a == index_a, expected1) tm.assert_numpy_array_equal(index_a == index_c, expected2) # test comparisons with numpy arrays array_a = np.array(index_a) array_b = np.array(index_a[0:-1]) array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) array_d = np.array(index_a[0:1]) with pytest.raises(ValueError, match=msg): index_a == array_b tm.assert_numpy_array_equal(index_a == array_a, expected1) tm.assert_numpy_array_equal(index_a == array_c, expected2) # test comparisons with Series series_a = Series(array_a) series_b = Series(array_b) series_c = Series(array_c) series_d = Series(array_d) with pytest.raises(ValueError, match=msg): index_a == series_b tm.assert_numpy_array_equal(index_a == series_a, expected1) tm.assert_numpy_array_equal(index_a == series_c, expected2) # cases where length is 1 for one of them with pytest.raises(ValueError, match="Lengths must match"): index_a == index_d with pytest.raises(ValueError, match="Lengths must match"): index_a == series_d with pytest.raises(ValueError, match="Lengths must match"): index_a == array_d msg = "Can only compare identically-labeled Series objects" with pytest.raises(ValueError, match=msg): series_a == series_d with pytest.raises(ValueError, match="Lengths must match"): series_a == array_d # comparing with a scalar should broadcast; note that we are excluding # MultiIndex because in this case each item in the index is a tuple of # length 2, and therefore is considered an array of length 2 in the # comparison instead of a scalar if not isinstance(index_a, MultiIndex): expected3 = np.array([False] * (len(index_a) - 2) + [True, False]) # assuming the 2nd to last item is unique in the data item = index_a[-2] tm.assert_numpy_array_equal(index_a == item, expected3) tm.assert_series_equal(series_a == item, Series(expected3))
def test_indexing(): idx = date_range("2001-1-1", periods=20, freq="M") ts = Series(np.random.rand(len(idx)), index=idx) # getting # GH 3070, make sure semantics work on Series/Frame expected = ts["2001"] expected.name = "A" df = DataFrame({"A": ts}) with tm.assert_produces_warning(FutureWarning): # GH#36179 string indexing on rows for DataFrame deprecated result = df["2001"]["A"] tm.assert_series_equal(expected, result) # setting ts["2001"] = 1 expected = ts["2001"] expected.name = "A" df.loc["2001", "A"] = 1 with tm.assert_produces_warning(FutureWarning): # GH#36179 string indexing on rows for DataFrame deprecated result = df["2001"]["A"] tm.assert_series_equal(expected, result) # GH3546 (not including times on the last day) idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:00", freq="H") ts = Series(range(len(idx)), index=idx) expected = ts["2013-05"] tm.assert_series_equal(expected, ts) idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:59", freq="S") ts = Series(range(len(idx)), index=idx) expected = ts["2013-05"] tm.assert_series_equal(expected, ts) idx = [ Timestamp("2013-05-31 00:00"), Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999)), ] ts = Series(range(len(idx)), index=idx) expected = ts["2013"] tm.assert_series_equal(expected, ts) # GH14826, indexing with a seconds resolution string / datetime object df = DataFrame( np.random.rand(5, 5), columns=["open", "high", "low", "close", "volume"], index=date_range("2012-01-02 18:01:00", periods=5, tz="US/Central", freq="s"), ) expected = df.loc[[df.index[2]]] # this is a single date, so will raise with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"): df["2012-01-02 18:01:02"] msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central', freq='S'\)" with pytest.raises(KeyError, match=msg): df[df.index[2]]
def test_loc_getitem_iterator(string_series): idx = iter(string_series.index[:10]) result = string_series.loc[idx] tm.assert_series_equal(result, string_series[:10])
def test_dt_accessor_api_for_categorical(self): # https://github.com/pandas-dev/pandas/issues/10661 s_dr = Series(date_range("1/1/2015", periods=5, tz="MET")) c_dr = s_dr.astype("category") s_pr = Series(period_range("1/1/2015", freq="D", periods=5)) c_pr = s_pr.astype("category") s_tdr = Series(timedelta_range("1 days", "10 days")) c_tdr = s_tdr.astype("category") # only testing field (like .day) # and bool (is_month_start) get_ops = lambda x: x._datetimelike_ops test_data = [ ("Datetime", get_ops(DatetimeIndex), s_dr, c_dr), ("Period", get_ops(PeriodArray), s_pr, c_pr), ("Timedelta", get_ops(TimedeltaIndex), s_tdr, c_tdr), ] assert isinstance(c_dr.dt, Properties) special_func_defs = [ ("strftime", ("%Y-%m-%d",), {}), ("tz_convert", ("EST",), {}), ("round", ("D",), {}), ("floor", ("D",), {}), ("ceil", ("D",), {}), ("asfreq", ("D",), {}), # FIXME: don't leave commented-out # ('tz_localize', ("UTC",), {}), ] _special_func_names = [f[0] for f in special_func_defs] # the series is already localized _ignore_names = ["tz_localize", "components"] for name, attr_names, s, c in test_data: func_names = [ f for f in dir(s.dt) if not ( f.startswith("_") or f in attr_names or f in _special_func_names or f in _ignore_names ) ] func_defs = [(f, (), {}) for f in func_names] for f_def in special_func_defs: if f_def[0] in dir(s.dt): func_defs.append(f_def) for func, args, kwargs in func_defs: with warnings.catch_warnings(): if func == "to_period": # dropping TZ warnings.simplefilter("ignore", UserWarning) res = getattr(c.dt, func)(*args, **kwargs) exp = getattr(s.dt, func)(*args, **kwargs) tm.assert_equal(res, exp) for attr in attr_names: if attr in ["week", "weekofyear"]: # GH#33595 Deprecate week and weekofyear continue res = getattr(c.dt, attr) exp = getattr(s.dt, attr) if isinstance(res, DataFrame): tm.assert_frame_equal(res, exp) elif isinstance(res, Series): tm.assert_series_equal(res, exp) else: tm.assert_almost_equal(res, exp) invalid = Series([1, 2, 3]).astype("category") msg = "Can only use .dt accessor with datetimelike" with pytest.raises(AttributeError, match=msg): invalid.dt assert not hasattr(invalid, "str")
def test_groupby_rolling_center_center(self): # GH 35552 series = Series(range(1, 6)) result = series.groupby(series).rolling(center=True, window=3).mean() expected = Series( [np.nan] * 5, index=MultiIndex.from_tuples(((1, 0), (2, 1), (3, 2), (4, 3), (5, 4))), ) tm.assert_series_equal(result, expected) series = Series(range(1, 5)) result = series.groupby(series).rolling(center=True, window=3).mean() expected = Series( [np.nan] * 4, index=MultiIndex.from_tuples(((1, 0), (2, 1), (3, 2), (4, 3))), ) tm.assert_series_equal(result, expected) df = DataFrame({"a": ["a"] * 5 + ["b"] * 6, "b": range(11)}) result = df.groupby("a").rolling(center=True, window=3).mean() expected = DataFrame( [np.nan, 1, 2, 3, np.nan, np.nan, 6, 7, 8, 9, np.nan], index=MultiIndex.from_tuples( ( ("a", 0), ("a", 1), ("a", 2), ("a", 3), ("a", 4), ("b", 5), ("b", 6), ("b", 7), ("b", 8), ("b", 9), ("b", 10), ), names=["a", None], ), columns=["b"], ) tm.assert_frame_equal(result, expected) df = DataFrame({"a": ["a"] * 5 + ["b"] * 5, "b": range(10)}) result = df.groupby("a").rolling(center=True, window=3).mean() expected = DataFrame( [np.nan, 1, 2, 3, np.nan, np.nan, 6, 7, 8, np.nan], index=MultiIndex.from_tuples( ( ("a", 0), ("a", 1), ("a", 2), ("a", 3), ("a", 4), ("b", 5), ("b", 6), ("b", 7), ("b", 8), ("b", 9), ), names=["a", None], ), columns=["b"], ) tm.assert_frame_equal(result, expected)
def test_apply_raw_float_frame_lambda(float_frame, axis): result = float_frame.apply(np.mean, axis=axis, raw=True) expected = float_frame.apply(lambda x: x.values.mean(), axis=axis) tm.assert_series_equal(result, expected)