MultiIndex with a level that is a tzaware DatetimeIndex. """ # GH#8367 round trip with pickle return MultiIndex.from_product( [[1, 2], ["a", "b"], pd.date_range("20130101", periods=3, tz="US/Eastern")], names=["one", "two", "three"], ) indices_dict = { "unicode": tm.makeUnicodeIndex(100), "string": tm.makeStringIndex(100), "datetime": tm.makeDateIndex(100), "datetime-tz": tm.makeDateIndex(100, tz="US/Pacific"), "period": tm.makePeriodIndex(100), "timedelta": tm.makeTimedeltaIndex(100), "int": tm.makeIntIndex(100), "uint": tm.makeUIntIndex(100), "range": tm.makeRangeIndex(100), "float": tm.makeFloatIndex(100), "bool": tm.makeBoolIndex(10), "categorical": tm.makeCategoricalIndex(100), "interval": tm.makeIntervalIndex(100), "empty": Index([]), "tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])), "mi-with-dt64tz-level": _create_mi_with_dt64tz_level(), "multi": _create_multiindex(), "repeats": Index([0, 0, 1, 1, 2, 2]), }
class TestSeriesMisc: def test_scalarop_preserve_name(self, datetime_series): result = datetime_series * 2 assert result.name == datetime_series.name def test_copy_name(self, datetime_series): result = datetime_series.copy() assert result.name == datetime_series.name def test_copy_index_name_checking(self, datetime_series): # don't want to be able to modify the index stored elsewhere after # making a copy datetime_series.index.name = None assert datetime_series.index.name is None assert datetime_series is datetime_series cp = datetime_series.copy() cp.index.name = "foo" printing.pprint_thing(datetime_series.index.name) assert datetime_series.index.name is None def test_append_preserve_name(self, datetime_series): result = datetime_series[:5].append(datetime_series[5:]) assert result.name == datetime_series.name def test_binop_maybe_preserve_name(self, datetime_series): # names match, preserve result = datetime_series * datetime_series assert result.name == datetime_series.name result = datetime_series.mul(datetime_series) assert result.name == datetime_series.name result = datetime_series * datetime_series[:-2] assert result.name == datetime_series.name # names don't match, don't preserve cp = datetime_series.copy() cp.name = "something else" result = datetime_series + cp assert result.name is None result = datetime_series.add(cp) assert result.name is None ops = ["add", "sub", "mul", "div", "truediv", "floordiv", "mod", "pow"] ops = ops + ["r" + op for op in ops] for op in ops: # names match, preserve s = datetime_series.copy() result = getattr(s, op)(s) assert result.name == datetime_series.name # names don't match, don't preserve cp = datetime_series.copy() cp.name = "changed" result = getattr(s, op)(cp) assert result.name is None def test_combine_first_name(self, datetime_series): result = datetime_series.combine_first(datetime_series[:5]) assert result.name == datetime_series.name def test_getitem_preserve_name(self, datetime_series): result = datetime_series[datetime_series > 0] assert result.name == datetime_series.name result = datetime_series[[0, 2, 4]] assert result.name == datetime_series.name result = datetime_series[5:10] assert result.name == datetime_series.name def test_pickle_datetimes(self, datetime_series): unp_ts = self._pickle_roundtrip(datetime_series) tm.assert_series_equal(unp_ts, datetime_series) def test_pickle_strings(self, string_series): unp_series = self._pickle_roundtrip(string_series) tm.assert_series_equal(unp_series, string_series) def _pickle_roundtrip(self, obj): with tm.ensure_clean() as path: obj.to_pickle(path) unpickled = pd.read_pickle(path) return unpickled def test_sort_index_name(self, datetime_series): result = datetime_series.sort_index(ascending=False) assert result.name == datetime_series.name def test_constructor_dict(self): d = {"a": 0.0, "b": 1.0, "c": 2.0} result = Series(d) expected = Series(d, index=sorted(d.keys())) tm.assert_series_equal(result, expected) result = Series(d, index=["b", "c", "d", "a"]) expected = Series([1, 2, np.nan, 0], index=["b", "c", "d", "a"]) tm.assert_series_equal(result, expected) def test_constructor_subclass_dict(self, dict_subclass): data = dict_subclass((x, 10.0 * x) for x in range(10)) series = Series(data) expected = Series(dict(data.items())) tm.assert_series_equal(series, expected) def test_constructor_ordereddict(self): # GH3283 data = OrderedDict( ("col{i}".format(i=i), np.random.random()) for i in range(12)) series = Series(data) expected = Series(list(data.values()), list(data.keys())) tm.assert_series_equal(series, expected) # Test with subclass class A(OrderedDict): pass series = Series(A(data)) tm.assert_series_equal(series, expected) def test_constructor_dict_multiindex(self): d = {("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0} _d = sorted(d.items()) result = Series(d) expected = Series([x[1] for x in _d], index=pd.MultiIndex.from_tuples([x[0] for x in _d])) tm.assert_series_equal(result, expected) d["z"] = 111.0 _d.insert(0, ("z", d["z"])) result = Series(d) expected = Series([x[1] for x in _d], index=pd.Index([x[0] for x in _d], tupleize_cols=False)) result = result.reindex(index=expected.index) tm.assert_series_equal(result, expected) def test_constructor_dict_timedelta_index(self): # GH #12169 : Resample category data with timedelta index # construct Series from dict as data and TimedeltaIndex as index # will result NaN in result Series data expected = Series(data=["A", "B", "C"], index=pd.to_timedelta([0, 10, 20], unit="s")) result = Series( data={ pd.to_timedelta(0, unit="s"): "A", pd.to_timedelta(10, unit="s"): "B", pd.to_timedelta(20, unit="s"): "C", }, index=pd.to_timedelta([0, 10, 20], unit="s"), ) tm.assert_series_equal(result, expected) def test_sparse_accessor_updates_on_inplace(self): s = pd.Series([1, 1, 2, 3], dtype="Sparse[int]") s.drop([0, 1], inplace=True) assert s.sparse.density == 1.0 def test_tab_completion(self): # GH 9910 s = Series(list("abcd")) # Series of str values should have .str but not .dt/.cat in __dir__ assert "str" in dir(s) assert "dt" not in dir(s) assert "cat" not in dir(s) # similarly for .dt s = Series(date_range("1/1/2015", periods=5)) assert "dt" in dir(s) assert "str" not in dir(s) assert "cat" not in dir(s) # Similarly for .cat, but with the twist that str and dt should be # there if the categories are of that type first cat and str. s = Series(list("abbcd"), dtype="category") assert "cat" in dir(s) assert "str" in dir(s) # as it is a string categorical assert "dt" not in dir(s) # similar to cat and str s = Series(date_range("1/1/2015", periods=5)).astype("category") assert "cat" in dir(s) assert "str" not in dir(s) assert "dt" in dir(s) # as it is a datetime categorical def test_tab_completion_with_categorical(self): # test the tab completion display ok_for_cat = [ "categories", "codes", "ordered", "set_categories", "add_categories", "remove_categories", "rename_categories", "reorder_categories", "remove_unused_categories", "as_ordered", "as_unordered", ] def get_dir(s): results = [r for r in s.cat.__dir__() if not r.startswith("_")] return sorted(set(results)) s = Series(list("aabbcde")).astype("category") results = get_dir(s) tm.assert_almost_equal(results, sorted(set(ok_for_cat))) @pytest.mark.parametrize( "index", [ tm.makeUnicodeIndex(10), tm.makeStringIndex(10), tm.makeCategoricalIndex(10), Index(["foo", "bar", "baz"] * 2), tm.makeDateIndex(10), tm.makePeriodIndex(10), tm.makeTimedeltaIndex(10), tm.makeIntIndex(10), tm.makeUIntIndex(10), tm.makeIntIndex(10), tm.makeFloatIndex(10), Index([True, False]), Index(["a{}".format(i) for i in range(101)]), pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")), pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")), ], ) def test_index_tab_completion(self, index): # dir contains string-like values of the Index. s = pd.Series(index=index, dtype=object) dir_s = dir(s) for i, x in enumerate(s.index.unique(level=0)): if i < 100: assert not isinstance( x, str) or not x.isidentifier() or x in dir_s else: assert x not in dir_s def test_not_hashable(self): s_empty = Series(dtype=object) s = Series([1]) msg = "'Series' objects are mutable, thus they cannot be hashed" with pytest.raises(TypeError, match=msg): hash(s_empty) with pytest.raises(TypeError, match=msg): hash(s) def test_contains(self, datetime_series): tm.assert_contains_all(datetime_series.index, datetime_series) def test_iter_datetimes(self, datetime_series): for i, val in enumerate(datetime_series): assert val == datetime_series[i] def test_iter_strings(self, string_series): for i, val in enumerate(string_series): assert val == string_series[i] def test_keys(self, datetime_series): # HACK: By doing this in two stages, we avoid 2to3 wrapping the call # to .keys() in a list() getkeys = datetime_series.keys assert getkeys() is datetime_series.index def test_values(self, datetime_series): tm.assert_almost_equal(datetime_series.values, datetime_series, check_dtype=False) def test_iteritems_datetimes(self, datetime_series): for idx, val in datetime_series.iteritems(): assert val == datetime_series[idx] def test_iteritems_strings(self, string_series): for idx, val in string_series.iteritems(): assert val == string_series[idx] # assert is lazy (generators don't define reverse, lists do) assert not hasattr(string_series.iteritems(), "reverse") def test_items_datetimes(self, datetime_series): for idx, val in datetime_series.items(): assert val == datetime_series[idx] def test_items_strings(self, string_series): for idx, val in string_series.items(): assert val == string_series[idx] # assert is lazy (generators don't define reverse, lists do) assert not hasattr(string_series.items(), "reverse") def test_raise_on_info(self): s = Series(np.random.randn(10)) msg = "'Series' object has no attribute 'info'" with pytest.raises(AttributeError, match=msg): s.info() def test_copy(self): for deep in [None, False, True]: s = Series(np.arange(10), dtype="float64") # default deep is True if deep is None: s2 = s.copy() else: s2 = s.copy(deep=deep) s2[::2] = np.NaN if deep is None or deep is True: # Did not modify original Series assert np.isnan(s2[0]) assert not np.isnan(s[0]) else: # we DID modify the original Series assert np.isnan(s2[0]) assert np.isnan(s[0]) def test_copy_tzaware(self): # GH#11794 # copy of tz-aware expected = Series([Timestamp("2012/01/01", tz="UTC")]) expected2 = Series([Timestamp("1999/01/01", tz="UTC")]) for deep in [None, False, True]: s = Series([Timestamp("2012/01/01", tz="UTC")]) if deep is None: s2 = s.copy() else: s2 = s.copy(deep=deep) s2[0] = pd.Timestamp("1999/01/01", tz="UTC") # default deep is True if deep is None or deep is True: # Did not modify original Series tm.assert_series_equal(s2, expected2) tm.assert_series_equal(s, expected) else: # we DID modify the original Series tm.assert_series_equal(s2, expected2) tm.assert_series_equal(s, expected2) def test_axis_alias(self): s = Series([1, 2, np.nan]) tm.assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index")) assert s.dropna().sum("rows") == 3 assert s._get_axis_number("rows") == 0 assert s._get_axis_name("rows") == "index" def test_class_axis(self): # https://github.com/pandas-dev/pandas/issues/18147 # no exception and no empty docstring assert pydoc.getdoc(Series.index) def test_numpy_unique(self, datetime_series): # it works! np.unique(datetime_series) def test_item(self): s = Series([1]) result = s.item() assert result == 1 assert result == s.iloc[0] assert isinstance(result, int) # i.e. not np.int64 ser = Series([0.5], index=[3]) result = ser.item() assert isinstance(result, float) assert result == 0.5 ser = Series([1, 2]) msg = "can only convert an array of size 1" with pytest.raises(ValueError, match=msg): ser.item() dti = pd.date_range("2016-01-01", periods=2) with pytest.raises(ValueError, match=msg): dti.item() with pytest.raises(ValueError, match=msg): Series(dti).item() val = dti[:1].item() assert isinstance(val, Timestamp) val = Series(dti)[:1].item() assert isinstance(val, Timestamp) tdi = dti - dti with pytest.raises(ValueError, match=msg): tdi.item() with pytest.raises(ValueError, match=msg): Series(tdi).item() val = tdi[:1].item() assert isinstance(val, Timedelta) val = Series(tdi)[:1].item() assert isinstance(val, Timedelta) # Case where ser[0] would not work ser = Series(dti, index=[5, 6]) val = ser[:1].item() assert val == dti[0] def test_ndarray_compat(self): # test numpy compat with Series as sub-class of NDFrame tsdf = DataFrame( np.random.randn(1000, 3), columns=["A", "B", "C"], index=date_range("1/1/2000", periods=1000), ) def f(x): return x[x.idxmax()] result = tsdf.apply(f) expected = tsdf.max() tm.assert_series_equal(result, expected) # using an ndarray like function s = Series(np.random.randn(10)) result = Series(np.ones_like(s)) expected = Series(1, index=range(10), dtype="float64") tm.assert_series_equal(result, expected) # ravel s = Series(np.random.randn(10)) tm.assert_almost_equal(s.ravel(order="F"), s.values.ravel(order="F")) def test_str_accessor_updates_on_inplace(self): s = pd.Series(list("abc")) s.drop([0], inplace=True) assert len(s.str.lower()) == 2 def test_str_attribute(self): # GH9068 methods = ["strip", "rstrip", "lstrip"] s = Series([" jack", "jill ", " jesse ", "frank"]) for method in methods: expected = Series([getattr(str, method)(x) for x in s.values]) tm.assert_series_equal( getattr(Series.str, method)(s.str), expected) # str accessor only valid with string values s = Series(range(5)) with pytest.raises(AttributeError, match="only use .str accessor"): s.str.repeat(2) def test_empty_method(self): s_empty = pd.Series(dtype=object) assert s_empty.empty s2 = pd.Series(index=[1], dtype=object) for full_series in [pd.Series([1]), s2]: assert not full_series.empty @async_mark() async def test_tab_complete_warning(self, ip): # https://github.com/pandas-dev/pandas/issues/16409 pytest.importorskip("IPython", minversion="6.0.0") from IPython.core.completer import provisionalcompleter code = "import pandas as pd; s = pd.Series()" await ip.run_code(code) # TODO: remove it when Ipython updates # GH 33567, jedi version raises Deprecation warning in Ipython import jedi if jedi.__version__ < "0.17.0": warning = tm.assert_produces_warning(None) else: warning = tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False) with warning: with provisionalcompleter("ignore"): list(ip.Completer.completions("s.", 1)) def test_integer_series_size(self): # GH 25580 s = Series(range(9)) assert s.size == 9 s = Series(range(9), dtype="Int64") assert s.size == 9 def test_attrs(self): s = pd.Series([0, 1], name="abc") assert s.attrs == {} s.attrs["version"] = 1 result = s + 1 assert result.attrs == {"version": 1}
class TestPeriodIndex(DatetimeLike): _holder = PeriodIndex @pytest.fixture( params=[ tm.makePeriodIndex(10), period_range("20130101", periods=10, freq="D")[::-1], ], ids=["index_inc", "index_dec"], ) def indices(self, request): return request.param def create_index(self) -> PeriodIndex: return period_range("20130101", periods=5, freq="D") def test_pickle_compat_construction(self): pass @pytest.mark.parametrize("freq", ["D", "M", "A"]) def test_pickle_round_trip(self, freq): idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq=freq) result = tm.round_trip_pickle(idx) tm.assert_index_equal(result, idx) def test_where(self): # This is handled in test_indexing pass @pytest.mark.parametrize("use_numpy", [True, False]) @pytest.mark.parametrize( "index", [ period_range("2000-01-01", periods=3, freq="D"), period_range("2001-01-01", periods=3, freq="2D"), PeriodIndex(["2001-01", "NaT", "2003-01"], freq="M"), ], ) def test_repeat_freqstr(self, index, use_numpy): # GH10183 expected = PeriodIndex([p for p in index for _ in range(3)]) result = np.repeat(index, 3) if use_numpy else index.repeat(3) tm.assert_index_equal(result, expected) assert result.freqstr == index.freqstr def test_fillna_period(self): # GH 11343 idx = PeriodIndex(["2011-01-01 09:00", NaT, "2011-01-01 11:00"], freq="H") exp = PeriodIndex( ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], freq="H") tm.assert_index_equal(idx.fillna(Period("2011-01-01 10:00", freq="H")), exp) exp = Index( [ Period("2011-01-01 09:00", freq="H"), "x", Period("2011-01-01 11:00", freq="H"), ], dtype=object, ) tm.assert_index_equal(idx.fillna("x"), exp) exp = Index( [ Period("2011-01-01 09:00", freq="H"), Period("2011-01-01", freq="D"), Period("2011-01-01 11:00", freq="H"), ], dtype=object, ) tm.assert_index_equal(idx.fillna(Period("2011-01-01", freq="D")), exp) def test_no_millisecond_field(self): msg = "type object 'DatetimeIndex' has no attribute 'millisecond'" with pytest.raises(AttributeError, match=msg): DatetimeIndex.millisecond msg = "'DatetimeIndex' object has no attribute 'millisecond'" with pytest.raises(AttributeError, match=msg): DatetimeIndex([]).millisecond def test_make_time_series(self): index = period_range(freq="A", start="1/1/2001", end="12/1/2009") series = Series(1, index=index) assert isinstance(series, Series) def test_shallow_copy_empty(self): # GH13067 idx = PeriodIndex([], freq="M") result = idx._shallow_copy() expected = idx tm.assert_index_equal(result, expected) def test_shallow_copy_disallow_i8(self): # GH-24391 pi = period_range("2018-01-01", periods=3, freq="2D") with pytest.raises(AssertionError, match="ndarray"): pi._shallow_copy(pi.asi8) def test_shallow_copy_requires_disallow_period_index(self): pi = period_range("2018-01-01", periods=3, freq="2D") with pytest.raises(AssertionError, match="PeriodIndex"): pi._shallow_copy(pi) def test_view_asi8(self): idx = PeriodIndex([], freq="M") exp = np.array([], dtype=np.int64) tm.assert_numpy_array_equal(idx.view("i8"), exp) tm.assert_numpy_array_equal(idx.asi8, exp) idx = PeriodIndex(["2011-01", NaT], freq="M") exp = np.array([492, -9223372036854775808], dtype=np.int64) tm.assert_numpy_array_equal(idx.view("i8"), exp) tm.assert_numpy_array_equal(idx.asi8, exp) exp = np.array([14975, -9223372036854775808], dtype=np.int64) idx = PeriodIndex(["2011-01-01", NaT], freq="D") tm.assert_numpy_array_equal(idx.view("i8"), exp) tm.assert_numpy_array_equal(idx.asi8, exp) def test_values(self): idx = PeriodIndex([], freq="M") exp = np.array([], dtype=np.object) tm.assert_numpy_array_equal(idx.values, exp) tm.assert_numpy_array_equal(idx.to_numpy(), exp) exp = np.array([], dtype=np.int64) tm.assert_numpy_array_equal(idx.asi8, exp) idx = PeriodIndex(["2011-01", NaT], freq="M") exp = np.array([Period("2011-01", freq="M"), NaT], dtype=object) tm.assert_numpy_array_equal(idx.values, exp) tm.assert_numpy_array_equal(idx.to_numpy(), exp) exp = np.array([492, -9223372036854775808], dtype=np.int64) tm.assert_numpy_array_equal(idx.asi8, exp) idx = PeriodIndex(["2011-01-01", NaT], freq="D") exp = np.array([Period("2011-01-01", freq="D"), NaT], dtype=object) tm.assert_numpy_array_equal(idx.values, exp) tm.assert_numpy_array_equal(idx.to_numpy(), exp) exp = np.array([14975, -9223372036854775808], dtype=np.int64) tm.assert_numpy_array_equal(idx.asi8, exp) def test_period_index_length(self): pi = period_range(freq="A", start="1/1/2001", end="12/1/2009") assert len(pi) == 9 pi = period_range(freq="Q", start="1/1/2001", end="12/1/2009") assert len(pi) == 4 * 9 pi = period_range(freq="M", start="1/1/2001", end="12/1/2009") assert len(pi) == 12 * 9 start = Period("02-Apr-2005", "B") i1 = period_range(start=start, periods=20) assert len(i1) == 20 assert i1.freq == start.freq assert i1[0] == start end_intv = Period("2006-12-31", "W") i1 = period_range(end=end_intv, periods=10) assert len(i1) == 10 assert i1.freq == end_intv.freq assert i1[-1] == end_intv end_intv = Period("2006-12-31", "1w") i2 = period_range(end=end_intv, periods=10) assert len(i1) == len(i2) assert (i1 == i2).all() assert i1.freq == i2.freq end_intv = Period("2006-12-31", ("w", 1)) i2 = period_range(end=end_intv, periods=10) assert len(i1) == len(i2) assert (i1 == i2).all() assert i1.freq == i2.freq msg = "start and end must have same freq" with pytest.raises(ValueError, match=msg): period_range(start=start, end=end_intv) end_intv = Period("2005-05-01", "B") i1 = period_range(start=start, end=end_intv) msg = ("Of the three parameters: start, end, and periods, exactly two " "must be specified") with pytest.raises(ValueError, match=msg): period_range(start=start) # infer freq from first element i2 = PeriodIndex([end_intv, Period("2005-05-05", "B")]) assert len(i2) == 2 assert i2[0] == end_intv i2 = PeriodIndex(np.array([end_intv, Period("2005-05-05", "B")])) assert len(i2) == 2 assert i2[0] == end_intv # Mixed freq should fail vals = [end_intv, Period("2006-12-31", "w")] msg = r"Input has different freq=W-SUN from PeriodIndex\(freq=B\)" with pytest.raises(IncompatibleFrequency, match=msg): PeriodIndex(vals) vals = np.array(vals) with pytest.raises(ValueError, match=msg): PeriodIndex(vals) def test_fields(self): # year, month, day, hour, minute # second, weekofyear, week, dayofweek, weekday, dayofyear, quarter # qyear pi = period_range(freq="A", start="1/1/2001", end="12/1/2005") self._check_all_fields(pi) pi = period_range(freq="Q", start="1/1/2001", end="12/1/2002") self._check_all_fields(pi) pi = period_range(freq="M", start="1/1/2001", end="1/1/2002") self._check_all_fields(pi) pi = period_range(freq="D", start="12/1/2001", end="6/1/2001") self._check_all_fields(pi) pi = period_range(freq="B", start="12/1/2001", end="6/1/2001") self._check_all_fields(pi) pi = period_range(freq="H", start="12/31/2001", end="1/1/2002 23:00") self._check_all_fields(pi) pi = period_range(freq="Min", start="12/31/2001", end="1/1/2002 00:20") self._check_all_fields(pi) pi = period_range(freq="S", start="12/31/2001 00:00:00", end="12/31/2001 00:05:00") self._check_all_fields(pi) end_intv = Period("2006-12-31", "W") i1 = period_range(end=end_intv, periods=10) self._check_all_fields(i1) def _check_all_fields(self, periodindex): fields = [ "year", "month", "day", "hour", "minute", "second", "weekofyear", "week", "dayofweek", "dayofyear", "quarter", "qyear", "days_in_month", ] periods = list(periodindex) s = pd.Series(periodindex) for field in fields: field_idx = getattr(periodindex, field) assert len(periodindex) == len(field_idx) for x, val in zip(periods, field_idx): assert getattr(x, field) == val if len(s) == 0: continue field_s = getattr(s.dt, field) assert len(periodindex) == len(field_s) for x, val in zip(periods, field_s): assert getattr(x, field) == val def test_period_set_index_reindex(self): # GH 6631 df = DataFrame(np.random.random(6)) idx1 = period_range("2011/01/01", periods=6, freq="M") idx2 = period_range("2013", periods=6, freq="A") df = df.set_index(idx1) tm.assert_index_equal(df.index, idx1) df = df.set_index(idx2) tm.assert_index_equal(df.index, idx2) @pytest.mark.parametrize( "p_values, o_values, values, expected_values", [ ( [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")], [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC"), "All"], [1.0, 1.0], [1.0, 1.0, np.nan], ), ( [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")], [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")], [1.0, 1.0], [1.0, 1.0], ), ], ) def test_period_reindex_with_object(self, p_values, o_values, values, expected_values): # GH 28337 period_index = PeriodIndex(p_values) object_index = Index(o_values) s = pd.Series(values, index=period_index) result = s.reindex(object_index) expected = pd.Series(expected_values, index=object_index) tm.assert_series_equal(result, expected) def test_factorize(self): idx1 = PeriodIndex( ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"], freq="M") exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) exp_idx = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M") arr, idx = idx1.factorize() tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) arr, idx = idx1.factorize(sort=True) tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) idx2 = PeriodIndex( ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"], freq="M") exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) arr, idx = idx2.factorize(sort=True) tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) exp_idx = PeriodIndex(["2014-03", "2014-02", "2014-01"], freq="M") arr, idx = idx2.factorize() tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) def test_is_(self): create_index = lambda: period_range( freq="A", start="1/1/2001", end="12/1/2009") index = create_index() assert index.is_(index) assert not index.is_(create_index()) assert index.is_(index.view()) assert index.is_(index.view().view().view().view().view()) assert index.view().is_(index) ind2 = index.view() index.name = "Apple" assert ind2.is_(index) assert not index.is_(index[:]) assert not index.is_(index.asfreq("M")) assert not index.is_(index.asfreq("A")) assert not index.is_(index - 2) assert not index.is_(index - 0) def test_contains(self): rng = period_range("2007-01", freq="M", periods=10) assert Period("2007-01", freq="M") in rng assert not Period("2007-01", freq="D") in rng assert not Period("2007-01", freq="2M") in rng def test_contains_nat(self): # see gh-13582 idx = period_range("2007-01", freq="M", periods=10) assert NaT not in idx assert None not in idx assert float("nan") not in idx assert np.nan not in idx idx = PeriodIndex(["2011-01", "NaT", "2011-02"], freq="M") assert NaT in idx assert None in idx assert float("nan") in idx assert np.nan in idx def test_periods_number_check(self): msg = ("Of the three parameters: start, end, and periods, exactly two " "must be specified") with pytest.raises(ValueError, match=msg): period_range("2011-1-1", "2012-1-1", "B") def test_index_duplicate_periods(self): # monotonic idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN") ts = Series(np.random.randn(len(idx)), index=idx) result = ts["2007"] expected = ts[1:3] tm.assert_series_equal(result, expected) result[:] = 1 assert (ts[1:3] == 1).all() # not monotonic idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN") ts = Series(np.random.randn(len(idx)), index=idx) result = ts["2007"] expected = ts[idx == "2007"] tm.assert_series_equal(result, expected) def test_index_unique(self): idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN") expected = PeriodIndex([2000, 2007, 2009], freq="A-JUN") tm.assert_index_equal(idx.unique(), expected) assert idx.nunique() == 3 idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN", tz="US/Eastern") expected = PeriodIndex([2000, 2007, 2009], freq="A-JUN", tz="US/Eastern") tm.assert_index_equal(idx.unique(), expected) assert idx.nunique() == 3 def test_shift(self): # This is tested in test_arithmetic pass @td.skip_if_32bit def test_ndarray_compat_properties(self): super().test_ndarray_compat_properties() def test_negative_ordinals(self): Period(ordinal=-1000, freq="A") Period(ordinal=0, freq="A") idx1 = PeriodIndex(ordinal=[-1, 0, 1], freq="A") idx2 = PeriodIndex(ordinal=np.array([-1, 0, 1]), freq="A") tm.assert_index_equal(idx1, idx2) def test_pindex_fieldaccessor_nat(self): idx = PeriodIndex(["2011-01", "2011-02", "NaT", "2012-03", "2012-04"], freq="D", name="name") exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64, name="name") tm.assert_index_equal(idx.year, exp) exp = Index([1, 2, -1, 3, 4], dtype=np.int64, name="name") tm.assert_index_equal(idx.month, exp) def test_pindex_qaccess(self): pi = PeriodIndex(["2Q05", "3Q05", "4Q05", "1Q06", "2Q06"], freq="Q") s = Series(np.random.rand(len(pi)), index=pi).cumsum() # Todo: fix these accessors! assert s["05Q4"] == s[2] def test_pindex_multiples(self): expected = PeriodIndex( ["2011-01", "2011-03", "2011-05", "2011-07", "2011-09", "2011-11"], freq="2M", ) pi = period_range(start="1/1/11", end="12/31/11", freq="2M") tm.assert_index_equal(pi, expected) assert pi.freq == offsets.MonthEnd(2) assert pi.freqstr == "2M" pi = period_range(start="1/1/11", periods=6, freq="2M") tm.assert_index_equal(pi, expected) assert pi.freq == offsets.MonthEnd(2) assert pi.freqstr == "2M" def test_iteration(self): index = period_range(start="1/1/10", periods=4, freq="B") result = list(index) assert isinstance(result[0], Period) assert result[0].freq == index.freq def test_is_full(self): index = PeriodIndex([2005, 2007, 2009], freq="A") assert not index.is_full index = PeriodIndex([2005, 2006, 2007], freq="A") assert index.is_full index = PeriodIndex([2005, 2005, 2007], freq="A") assert not index.is_full index = PeriodIndex([2005, 2005, 2006], freq="A") assert index.is_full index = PeriodIndex([2006, 2005, 2005], freq="A") with pytest.raises(ValueError, match="Index is not monotonic"): index.is_full assert index[:0].is_full def test_with_multi_index(self): # #1705 index = date_range("1/1/2012", periods=4, freq="12H") index_as_arrays = [index.to_period(freq="D"), index.hour] s = Series([0, 1, 2, 3], index_as_arrays) assert isinstance(s.index.levels[0], PeriodIndex) assert isinstance(s.index.values[0][0], Period) def test_convert_array_of_periods(self): rng = period_range("1/1/2000", periods=20, freq="D") periods = list(rng) result = Index(periods) assert isinstance(result, PeriodIndex) def test_append_concat(self): # #1815 d1 = date_range("12/31/1990", "12/31/1999", freq="A-DEC") d2 = date_range("12/31/2000", "12/31/2009", freq="A-DEC") s1 = Series(np.random.randn(10), d1) s2 = Series(np.random.randn(10), d2) s1 = s1.to_period() s2 = s2.to_period() # drops index result = pd.concat([s1, s2]) assert isinstance(result.index, PeriodIndex) assert result.index[0] == s1.index[0] def test_pickle_freq(self): # GH2891 prng = period_range("1/1/2011", "1/1/2012", freq="M") new_prng = tm.round_trip_pickle(prng) assert new_prng.freq == offsets.MonthEnd() assert new_prng.freqstr == "M" def test_map(self): # test_map_dictlike generally tests index = PeriodIndex([2005, 2007, 2009], freq="A") result = index.map(lambda x: x.ordinal) exp = Index([x.ordinal for x in index]) tm.assert_index_equal(result, exp) def test_insert(self): # GH 18295 (test missing) expected = PeriodIndex(["2017Q1", NaT, "2017Q2", "2017Q3", "2017Q4"], freq="Q") for na in (np.nan, NaT, None): result = period_range("2017Q1", periods=4, freq="Q").insert(1, na) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "msg, key", [ (r"Period\('2019', 'A-DEC'\), 'foo', 'bar'", (Period(2019), "foo", "bar")), (r"Period\('2019', 'A-DEC'\), 'y1', 'bar'", (Period(2019), "y1", "bar")), (r"Period\('2019', 'A-DEC'\), 'foo', 'z1'", (Period(2019), "foo", "z1")), ( r"Period\('2018', 'A-DEC'\), Period\('2016', 'A-DEC'\), 'bar'", (Period(2018), Period(2016), "bar"), ), (r"Period\('2018', 'A-DEC'\), 'foo', 'y1'", (Period(2018), "foo", "y1")), ( r"Period\('2017', 'A-DEC'\), 'foo', Period\('2015', 'A-DEC'\)", (Period(2017), "foo", Period(2015)), ), (r"Period\('2017', 'A-DEC'\), 'z1', 'bar'", (Period(2017), "z1", "bar")), ], ) def test_contains_raise_error_if_period_index_is_in_multi_index( self, msg, key): # issue 20684 """ parse_time_string return parameter if type not matched. PeriodIndex.get_loc takes returned value from parse_time_string as a tuple. If first argument is Period and a tuple has 3 items, process go on not raise exception """ df = DataFrame({ "A": [Period(2019), "x1", "x2"], "B": [Period(2018), Period(2016), "y1"], "C": [Period(2017), "z1", Period(2015)], "V1": [1, 2, 3], "V2": [10, 20, 30], }).set_index(["A", "B", "C"]) with pytest.raises(KeyError, match=msg): df.loc[key]
class TestSeriesMisc: def test_tab_completion(self): # GH 9910 s = Series(list("abcd")) # Series of str values should have .str but not .dt/.cat in __dir__ assert "str" in dir(s) assert "dt" not in dir(s) assert "cat" not in dir(s) # similarly for .dt s = Series(date_range("1/1/2015", periods=5)) assert "dt" in dir(s) assert "str" not in dir(s) assert "cat" not in dir(s) # Similarly for .cat, but with the twist that str and dt should be # there if the categories are of that type first cat and str. s = Series(list("abbcd"), dtype="category") assert "cat" in dir(s) assert "str" in dir(s) # as it is a string categorical assert "dt" not in dir(s) # similar to cat and str s = Series(date_range("1/1/2015", periods=5)).astype("category") assert "cat" in dir(s) assert "str" not in dir(s) assert "dt" in dir(s) # as it is a datetime categorical def test_tab_completion_with_categorical(self): # test the tab completion display ok_for_cat = [ "categories", "codes", "ordered", "set_categories", "add_categories", "remove_categories", "rename_categories", "reorder_categories", "remove_unused_categories", "as_ordered", "as_unordered", ] def get_dir(s): results = [r for r in s.cat.__dir__() if not r.startswith("_")] return sorted(set(results)) s = Series(list("aabbcde")).astype("category") results = get_dir(s) tm.assert_almost_equal(results, sorted(set(ok_for_cat))) @pytest.mark.parametrize( "index", [ tm.makeUnicodeIndex(10), tm.makeStringIndex(10), tm.makeCategoricalIndex(10), Index(["foo", "bar", "baz"] * 2), tm.makeDateIndex(10), tm.makePeriodIndex(10), tm.makeTimedeltaIndex(10), tm.makeIntIndex(10), tm.makeUIntIndex(10), tm.makeIntIndex(10), tm.makeFloatIndex(10), Index([True, False]), Index([f"a{i}" for i in range(101)]), pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")), pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")), ], ) def test_index_tab_completion(self, index): # dir contains string-like values of the Index. s = Series(index=index, dtype=object) dir_s = dir(s) for i, x in enumerate(s.index.unique(level=0)): if i < 100: assert not isinstance( x, str) or not x.isidentifier() or x in dir_s else: assert x not in dir_s def test_not_hashable(self): s_empty = Series(dtype=object) s = Series([1]) msg = "'Series' objects are mutable, thus they cannot be hashed" with pytest.raises(TypeError, match=msg): hash(s_empty) with pytest.raises(TypeError, match=msg): hash(s) def test_contains(self, datetime_series): tm.assert_contains_all(datetime_series.index, datetime_series) def test_raise_on_info(self): s = Series(np.random.randn(10)) msg = "'Series' object has no attribute 'info'" with pytest.raises(AttributeError, match=msg): s.info() def test_axis_alias(self): s = Series([1, 2, np.nan]) tm.assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index")) assert s.dropna().sum("rows") == 3 assert s._get_axis_number("rows") == 0 assert s._get_axis_name("rows") == "index" def test_class_axis(self): # https://github.com/pandas-dev/pandas/issues/18147 # no exception and no empty docstring assert pydoc.getdoc(Series.index) def test_ndarray_compat(self): # test numpy compat with Series as sub-class of NDFrame tsdf = DataFrame( np.random.randn(1000, 3), columns=["A", "B", "C"], index=date_range("1/1/2000", periods=1000), ) def f(x): return x[x.idxmax()] result = tsdf.apply(f) expected = tsdf.max() tm.assert_series_equal(result, expected) # using an ndarray like function s = Series(np.random.randn(10)) result = Series(np.ones_like(s)) expected = Series(1, index=range(10), dtype="float64") tm.assert_series_equal(result, expected) # ravel s = Series(np.random.randn(10)) tm.assert_almost_equal(s.ravel(order="F"), s.values.ravel(order="F")) def test_empty_method(self): s_empty = Series(dtype=object) assert s_empty.empty s2 = Series(index=[1], dtype=object) for full_series in [Series([1]), s2]: assert not full_series.empty def test_integer_series_size(self): # GH 25580 s = Series(range(9)) assert s.size == 9 s = Series(range(9), dtype="Int64") assert s.size == 9 def test_attrs(self): s = Series([0, 1], name="abc") assert s.attrs == {} s.attrs["version"] = 1 result = s + 1 assert result.attrs == {"version": 1} @skip_if_no("jinja2") def test_inspect_getmembers(self): # GH38782 ser = Series(dtype=object) with tm.assert_produces_warning(None): inspect.getmembers(ser)
pd.date_range("20130101", periods=3, tz="US/Eastern")], names=["one", "two", "three"], ) indices_dict = { "unicode": tm.makeUnicodeIndex(100), "string": tm.makeStringIndex(100), "datetime": tm.makeDateIndex(100), "datetime-tz": tm.makeDateIndex(100, tz="US/Pacific"), "period": tm.makePeriodIndex(100), "timedelta": tm.makeTimedeltaIndex(100), "int": tm.makeIntIndex(100), "uint": tm.makeUIntIndex(100), "range": tm.makeRangeIndex(100), "float": tm.makeFloatIndex(100), "complex64": tm.makeFloatIndex(100).astype("complex64"), "complex128": tm.makeFloatIndex(100).astype("complex128"), "num_int64":
assert rng.inferred_freq == "-1A-JAN" def test_non_datetime_index2(): rng = DatetimeIndex(["1/31/2000", "1/31/2001", "1/31/2002"]) vals = rng.to_pydatetime() result = frequencies.infer_freq(vals) assert result == rng.inferred_freq @pytest.mark.parametrize( "idx", [tm.makeIntIndex(10), tm.makeFloatIndex(10), tm.makePeriodIndex(10)]) def test_invalid_index_types(idx): msg = ("(cannot infer freq from a non-convertible)|" "(Check the `freq` attribute instead of using infer_freq)") with pytest.raises(TypeError, match=msg): frequencies.infer_freq(idx) @pytest.mark.skipif(is_platform_windows(), reason="see gh-10822: Windows issue") @pytest.mark.parametrize( "idx", [tm.makeStringIndex(10), tm.makeUnicodeIndex(10)]) def test_invalid_index_types_unicode(idx): # see gh-10822
Series(["a", None, "c"]), Series([True, False, True]), Series(dtype=object), Index([1, 2, 3]), Index([True, False, True]), DataFrame({ "x": ["a", "b", "c"], "y": [1, 2, 3] }), DataFrame(), tm.makeMissingDataframe(), tm.makeMixedDataFrame(), tm.makeTimeDataFrame(), tm.makeTimeSeries(), tm.makeTimedeltaIndex(), tm.makePeriodIndex(), Series(tm.makePeriodIndex()), Series(pd.date_range("20130101", periods=3, tz="US/Eastern")), MultiIndex.from_product([ range(5), ["foo", "bar", "baz"], pd.date_range("20130101", periods=2) ]), MultiIndex.from_product([pd.CategoricalIndex(list("aabc")), range(3)]), ], ) def test_hash_pandas_object(obj, index): _check_equal(obj, index=index) _check_not_equal_with_index(obj)
def setup_method(self, method): self.bool_index = tm.makeBoolIndex(10, name="a") self.int_index = tm.makeIntIndex(10, name="a") self.float_index = tm.makeFloatIndex(10, name="a") self.dt_index = tm.makeDateIndex(10, name="a") self.dt_tz_index = tm.makeDateIndex( 10, name="a").tz_localize(tz="US/Eastern") self.period_index = tm.makePeriodIndex(10, name="a") self.string_index = tm.makeStringIndex(10, name="a") self.unicode_index = tm.makeUnicodeIndex(10, name="a") arr = np.random.randn(10) self.bool_series = Series(arr, index=self.bool_index, name="a") self.int_series = Series(arr, index=self.int_index, name="a") self.float_series = Series(arr, index=self.float_index, name="a") self.dt_series = Series(arr, index=self.dt_index, name="a") self.dt_tz_series = self.dt_tz_index.to_series() self.period_series = Series(arr, index=self.period_index, name="a") self.string_series = Series(arr, index=self.string_index, name="a") self.unicode_series = Series(arr, index=self.unicode_index, name="a") types = [ "bool", "int", "float", "dt", "dt_tz", "period", "string", "unicode" ] self.indexes = [getattr(self, f"{t}_index") for t in types] self.series = [getattr(self, f"{t}_series") for t in types] # To test narrow dtypes, we use narrower *data* elements, not *index* elements index = self.int_index self.float32_series = Series(arr.astype(np.float32), index=index, name="a") arr_int = np.random.choice(10, size=10, replace=False) self.int8_series = Series(arr_int.astype(np.int8), index=index, name="a") self.int16_series = Series(arr_int.astype(np.int16), index=index, name="a") self.int32_series = Series(arr_int.astype(np.int32), index=index, name="a") self.uint8_series = Series(arr_int.astype(np.uint8), index=index, name="a") self.uint16_series = Series(arr_int.astype(np.uint16), index=index, name="a") self.uint32_series = Series(arr_int.astype(np.uint32), index=index, name="a") nrw_types = [ "float32", "int8", "int16", "int32", "uint8", "uint16", "uint32" ] self.narrow_series = [getattr(self, f"{t}_series") for t in nrw_types] self.objs = self.indexes + self.series + self.narrow_series
rng = DatetimeIndex(["1/31/2000", "1/31/2001", "1/31/2002"]) rng = rng[::-1] assert rng.inferred_freq == "-1A-JAN" def test_non_datetime_index2(): rng = DatetimeIndex(["1/31/2000", "1/31/2001", "1/31/2002"]) vals = rng.to_pydatetime() result = frequencies.infer_freq(vals) assert result == rng.inferred_freq @pytest.mark.parametrize( "idx", [tm.makeIntIndex(10), tm.makeFloatIndex(10), tm.makePeriodIndex(10)] ) def test_invalid_index_types(idx): msg = ( "(cannot infer freq from a non-convertible)|" "(Check the `freq` attribute instead of using infer_freq)" ) with pytest.raises(TypeError, match=msg): frequencies.infer_freq(idx) @pytest.mark.skipif(is_platform_windows(), reason="see gh-10822: Windows issue") @pytest.mark.parametrize("idx", [tm.makeStringIndex(10), tm.makeUnicodeIndex(10)]) def test_invalid_index_types_unicode(idx): # see gh-10822
class TestSeriesMisc: def test_tab_completion(self): # GH 9910 s = Series(list("abcd")) # Series of str values should have .str but not .dt/.cat in __dir__ assert "str" in dir(s) assert "dt" not in dir(s) assert "cat" not in dir(s) def test_tab_completion_dt(self): # similarly for .dt s = Series(date_range("1/1/2015", periods=5)) assert "dt" in dir(s) assert "str" not in dir(s) assert "cat" not in dir(s) def test_tab_completion_cat(self): # Similarly for .cat, but with the twist that str and dt should be # there if the categories are of that type first cat and str. s = Series(list("abbcd"), dtype="category") assert "cat" in dir(s) assert "str" in dir(s) # as it is a string categorical assert "dt" not in dir(s) def test_tab_completion_cat_str(self): # similar to cat and str s = Series(date_range("1/1/2015", periods=5)).astype("category") assert "cat" in dir(s) assert "str" not in dir(s) assert "dt" in dir(s) # as it is a datetime categorical def test_tab_completion_with_categorical(self): # test the tab completion display ok_for_cat = [ "categories", "codes", "ordered", "set_categories", "add_categories", "remove_categories", "rename_categories", "reorder_categories", "remove_unused_categories", "as_ordered", "as_unordered", ] s = Series(list("aabbcde")).astype("category") results = sorted({r for r in s.cat.__dir__() if not r.startswith("_")}) tm.assert_almost_equal(results, sorted(set(ok_for_cat))) @pytest.mark.parametrize( "index", [ tm.makeUnicodeIndex(10), tm.makeStringIndex(10), tm.makeCategoricalIndex(10), Index(["foo", "bar", "baz"] * 2), tm.makeDateIndex(10), tm.makePeriodIndex(10), tm.makeTimedeltaIndex(10), tm.makeIntIndex(10), tm.makeUIntIndex(10), tm.makeIntIndex(10), tm.makeFloatIndex(10), Index([True, False]), Index([f"a{i}" for i in range(101)]), pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")), pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")), ], ) def test_index_tab_completion(self, index): # dir contains string-like values of the Index. s = Series(index=index, dtype=object) dir_s = dir(s) for i, x in enumerate(s.index.unique(level=0)): if i < 100: assert not isinstance( x, str) or not x.isidentifier() or x in dir_s else: assert x not in dir_s @pytest.mark.parametrize("ser", [Series(dtype=object), Series([1])]) def test_not_hashable(self, ser): msg = "unhashable type: 'Series'" with pytest.raises(TypeError, match=msg): hash(ser) def test_contains(self, datetime_series): tm.assert_contains_all(datetime_series.index, datetime_series) def test_axis_alias(self): s = Series([1, 2, np.nan]) tm.assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index")) assert s.dropna().sum("rows") == 3 assert s._get_axis_number("rows") == 0 assert s._get_axis_name("rows") == "index" def test_class_axis(self): # https://github.com/pandas-dev/pandas/issues/18147 # no exception and no empty docstring assert pydoc.getdoc(Series.index) def test_ndarray_compat(self): # test numpy compat with Series as sub-class of NDFrame tsdf = DataFrame( np.random.randn(1000, 3), columns=["A", "B", "C"], index=date_range("1/1/2000", periods=1000), ) def f(x): return x[x.idxmax()] result = tsdf.apply(f) expected = tsdf.max() tm.assert_series_equal(result, expected) def test_ndarray_compat_like_func(self): # using an ndarray like function s = Series(np.random.randn(10)) result = Series(np.ones_like(s)) expected = Series(1, index=range(10), dtype="float64") tm.assert_series_equal(result, expected) def test_ndarray_compat_ravel(self): # ravel s = Series(np.random.randn(10)) tm.assert_almost_equal(s.ravel(order="F"), s.values.ravel(order="F")) def test_empty_method(self): s_empty = Series(dtype=object) assert s_empty.empty @pytest.mark.parametrize("dtype", ["int64", object]) def test_empty_method_full_series(self, dtype): full_series = Series(index=[1], dtype=dtype) assert not full_series.empty @pytest.mark.parametrize("dtype", [None, "Int64"]) def test_integer_series_size(self, dtype): # GH 25580 s = Series(range(9), dtype=dtype) assert s.size == 9 def test_attrs(self): s = Series([0, 1], name="abc") assert s.attrs == {} s.attrs["version"] = 1 result = s + 1 assert result.attrs == {"version": 1} @skip_if_no("jinja2") def test_inspect_getmembers(self): # GH38782 ser = Series(dtype=object) # TODO(2.0): Change to None once is_monotonic deprecation # is enforced with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): inspect.getmembers(ser) def test_unknown_attribute(self): # GH#9680 tdi = pd.timedelta_range(start=0, periods=10, freq="1s") ser = Series(np.random.normal(size=10), index=tdi) assert "foo" not in ser.__dict__.keys() msg = "'Series' object has no attribute 'foo'" with pytest.raises(AttributeError, match=msg): ser.foo @pytest.mark.parametrize("op", ["year", "day", "second", "weekday"]) def test_datetime_series_no_datelike_attrs(self, op, datetime_series): # GH#7206 msg = f"'Series' object has no attribute '{op}'" with pytest.raises(AttributeError, match=msg): getattr(datetime_series, op) def test_series_datetimelike_attribute_access(self): # attribute access should still work! ser = Series({"year": 2000, "month": 1, "day": 10}) assert ser.year == 2000 assert ser.month == 1 assert ser.day == 10 def test_series_datetimelike_attribute_access_invalid(self): ser = Series({"year": 2000, "month": 1, "day": 10}) msg = "'Series' object has no attribute 'weekday'" with pytest.raises(AttributeError, match=msg): ser.weekday def test_series_iteritems_deprecated(self): ser = Series([1]) with tm.assert_produces_warning(FutureWarning): next(ser.iteritems())
class TestPeriodIndex(DatetimeLike): _index_cls = PeriodIndex @pytest.fixture def simple_index(self) -> Index: return period_range("20130101", periods=5, freq="D") @pytest.fixture( params=[ tm.makePeriodIndex(10), period_range("20130101", periods=10, freq="D")[::-1], ], ids=["index_inc", "index_dec"], ) def index(self, request): return request.param @pytest.mark.xfail(reason="Goes through a generate_range path") def test_pickle_compat_construction(self): super().test_pickle_compat_construction() @pytest.mark.parametrize("freq", ["D", "M", "A"]) def test_pickle_round_trip(self, freq): idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq=freq) result = tm.round_trip_pickle(idx) tm.assert_index_equal(result, idx) def test_where(self): # This is handled in test_indexing pass def test_make_time_series(self): index = period_range(freq="A", start="1/1/2001", end="12/1/2009") series = Series(1, index=index) assert isinstance(series, Series) def test_view_asi8(self): idx = PeriodIndex([], freq="M") exp = np.array([], dtype=np.int64) tm.assert_numpy_array_equal(idx.view("i8"), exp) tm.assert_numpy_array_equal(idx.asi8, exp) idx = PeriodIndex(["2011-01", NaT], freq="M") exp = np.array([492, -9223372036854775808], dtype=np.int64) tm.assert_numpy_array_equal(idx.view("i8"), exp) tm.assert_numpy_array_equal(idx.asi8, exp) exp = np.array([14975, -9223372036854775808], dtype=np.int64) idx = PeriodIndex(["2011-01-01", NaT], freq="D") tm.assert_numpy_array_equal(idx.view("i8"), exp) tm.assert_numpy_array_equal(idx.asi8, exp) def test_values(self): idx = PeriodIndex([], freq="M") exp = np.array([], dtype=object) tm.assert_numpy_array_equal(idx.values, exp) tm.assert_numpy_array_equal(idx.to_numpy(), exp) exp = np.array([], dtype=np.int64) tm.assert_numpy_array_equal(idx.asi8, exp) idx = PeriodIndex(["2011-01", NaT], freq="M") exp = np.array([Period("2011-01", freq="M"), NaT], dtype=object) tm.assert_numpy_array_equal(idx.values, exp) tm.assert_numpy_array_equal(idx.to_numpy(), exp) exp = np.array([492, -9223372036854775808], dtype=np.int64) tm.assert_numpy_array_equal(idx.asi8, exp) idx = PeriodIndex(["2011-01-01", NaT], freq="D") exp = np.array([Period("2011-01-01", freq="D"), NaT], dtype=object) tm.assert_numpy_array_equal(idx.values, exp) tm.assert_numpy_array_equal(idx.to_numpy(), exp) exp = np.array([14975, -9223372036854775808], dtype=np.int64) tm.assert_numpy_array_equal(idx.asi8, exp) def test_period_index_length(self): pi = period_range(freq="A", start="1/1/2001", end="12/1/2009") assert len(pi) == 9 pi = period_range(freq="Q", start="1/1/2001", end="12/1/2009") assert len(pi) == 4 * 9 pi = period_range(freq="M", start="1/1/2001", end="12/1/2009") assert len(pi) == 12 * 9 start = Period("02-Apr-2005", "B") i1 = period_range(start=start, periods=20) assert len(i1) == 20 assert i1.freq == start.freq assert i1[0] == start end_intv = Period("2006-12-31", "W") i1 = period_range(end=end_intv, periods=10) assert len(i1) == 10 assert i1.freq == end_intv.freq assert i1[-1] == end_intv end_intv = Period("2006-12-31", "1w") i2 = period_range(end=end_intv, periods=10) assert len(i1) == len(i2) assert (i1 == i2).all() assert i1.freq == i2.freq msg = "start and end must have same freq" with pytest.raises(ValueError, match=msg): period_range(start=start, end=end_intv) end_intv = Period("2005-05-01", "B") i1 = period_range(start=start, end=end_intv) msg = ("Of the three parameters: start, end, and periods, exactly two " "must be specified") with pytest.raises(ValueError, match=msg): period_range(start=start) # infer freq from first element i2 = PeriodIndex([end_intv, Period("2005-05-05", "B")]) assert len(i2) == 2 assert i2[0] == end_intv i2 = PeriodIndex(np.array([end_intv, Period("2005-05-05", "B")])) assert len(i2) == 2 assert i2[0] == end_intv # Mixed freq should fail vals = [end_intv, Period("2006-12-31", "w")] msg = r"Input has different freq=W-SUN from PeriodIndex\(freq=B\)" with pytest.raises(IncompatibleFrequency, match=msg): PeriodIndex(vals) vals = np.array(vals) with pytest.raises(ValueError, match=msg): PeriodIndex(vals) def test_fields(self): # year, month, day, hour, minute # second, weekofyear, week, dayofweek, weekday, dayofyear, quarter # qyear pi = period_range(freq="A", start="1/1/2001", end="12/1/2005") self._check_all_fields(pi) pi = period_range(freq="Q", start="1/1/2001", end="12/1/2002") self._check_all_fields(pi) pi = period_range(freq="M", start="1/1/2001", end="1/1/2002") self._check_all_fields(pi) pi = period_range(freq="D", start="12/1/2001", end="6/1/2001") self._check_all_fields(pi) pi = period_range(freq="B", start="12/1/2001", end="6/1/2001") self._check_all_fields(pi) pi = period_range(freq="H", start="12/31/2001", end="1/1/2002 23:00") self._check_all_fields(pi) pi = period_range(freq="Min", start="12/31/2001", end="1/1/2002 00:20") self._check_all_fields(pi) pi = period_range(freq="S", start="12/31/2001 00:00:00", end="12/31/2001 00:05:00") self._check_all_fields(pi) end_intv = Period("2006-12-31", "W") i1 = period_range(end=end_intv, periods=10) self._check_all_fields(i1) def _check_all_fields(self, periodindex): fields = [ "year", "month", "day", "hour", "minute", "second", "weekofyear", "week", "dayofweek", "day_of_week", "dayofyear", "day_of_year", "quarter", "qyear", "days_in_month", ] periods = list(periodindex) s = Series(periodindex) for field in fields: field_idx = getattr(periodindex, field) assert len(periodindex) == len(field_idx) for x, val in zip(periods, field_idx): assert getattr(x, field) == val if len(s) == 0: continue field_s = getattr(s.dt, field) assert len(periodindex) == len(field_s) for x, val in zip(periods, field_s): assert getattr(x, field) == val def test_is_(self): create_index = lambda: period_range( freq="A", start="1/1/2001", end="12/1/2009") index = create_index() assert index.is_(index) assert not index.is_(create_index()) assert index.is_(index.view()) assert index.is_(index.view().view().view().view().view()) assert index.view().is_(index) ind2 = index.view() index.name = "Apple" assert ind2.is_(index) assert not index.is_(index[:]) assert not index.is_(index.asfreq("M")) assert not index.is_(index.asfreq("A")) assert not index.is_(index - 2) assert not index.is_(index - 0) def test_index_unique(self): idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN") expected = PeriodIndex([2000, 2007, 2009], freq="A-JUN") tm.assert_index_equal(idx.unique(), expected) assert idx.nunique() == 3 def test_shift(self): # This is tested in test_arithmetic pass def test_negative_ordinals(self): Period(ordinal=-1000, freq="A") Period(ordinal=0, freq="A") idx1 = PeriodIndex(ordinal=[-1, 0, 1], freq="A") idx2 = PeriodIndex(ordinal=np.array([-1, 0, 1]), freq="A") tm.assert_index_equal(idx1, idx2) def test_pindex_fieldaccessor_nat(self): idx = PeriodIndex(["2011-01", "2011-02", "NaT", "2012-03", "2012-04"], freq="D", name="name") exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64, name="name") tm.assert_index_equal(idx.year, exp) exp = Index([1, 2, -1, 3, 4], dtype=np.int64, name="name") tm.assert_index_equal(idx.month, exp) def test_pindex_multiples(self): expected = PeriodIndex( ["2011-01", "2011-03", "2011-05", "2011-07", "2011-09", "2011-11"], freq="2M", ) pi = period_range(start="1/1/11", end="12/31/11", freq="2M") tm.assert_index_equal(pi, expected) assert pi.freq == offsets.MonthEnd(2) assert pi.freqstr == "2M" pi = period_range(start="1/1/11", periods=6, freq="2M") tm.assert_index_equal(pi, expected) assert pi.freq == offsets.MonthEnd(2) assert pi.freqstr == "2M" def test_iteration(self): index = period_range(start="1/1/10", periods=4, freq="B") result = list(index) assert isinstance(result[0], Period) assert result[0].freq == index.freq def test_with_multi_index(self): # #1705 index = date_range("1/1/2012", periods=4, freq="12H") index_as_arrays = [index.to_period(freq="D"), index.hour] s = Series([0, 1, 2, 3], index_as_arrays) assert isinstance(s.index.levels[0], PeriodIndex) assert isinstance(s.index.values[0][0], Period) def test_pickle_freq(self): # GH2891 prng = period_range("1/1/2011", "1/1/2012", freq="M") new_prng = tm.round_trip_pickle(prng) assert new_prng.freq == offsets.MonthEnd() assert new_prng.freqstr == "M" def test_map(self): # test_map_dictlike generally tests index = PeriodIndex([2005, 2007, 2009], freq="A") result = index.map(lambda x: x.ordinal) exp = Index([x.ordinal for x in index]) tm.assert_index_equal(result, exp) def test_format_empty(self): # GH35712 empty_idx = self._index_cls([], freq="A") assert empty_idx.format() == [] assert empty_idx.format(name=True) == [""]
class TestSeriesMisc: def test_append_preserve_name(self, datetime_series): result = datetime_series[:5].append(datetime_series[5:]) assert result.name == datetime_series.name def test_getitem_preserve_name(self, datetime_series): result = datetime_series[datetime_series > 0] assert result.name == datetime_series.name result = datetime_series[[0, 2, 4]] assert result.name == datetime_series.name result = datetime_series[5:10] assert result.name == datetime_series.name def test_pickle_datetimes(self, datetime_series): unp_ts = self._pickle_roundtrip(datetime_series) tm.assert_series_equal(unp_ts, datetime_series) def test_pickle_strings(self, string_series): unp_series = self._pickle_roundtrip(string_series) tm.assert_series_equal(unp_series, string_series) def _pickle_roundtrip(self, obj): with tm.ensure_clean() as path: obj.to_pickle(path) unpickled = pd.read_pickle(path) return unpickled def test_sparse_accessor_updates_on_inplace(self): s = Series([1, 1, 2, 3], dtype="Sparse[int]") return_value = s.drop([0, 1], inplace=True) assert return_value is None assert s.sparse.density == 1.0 def test_tab_completion(self): # GH 9910 s = Series(list("abcd")) # Series of str values should have .str but not .dt/.cat in __dir__ assert "str" in dir(s) assert "dt" not in dir(s) assert "cat" not in dir(s) # similarly for .dt s = Series(date_range("1/1/2015", periods=5)) assert "dt" in dir(s) assert "str" not in dir(s) assert "cat" not in dir(s) # Similarly for .cat, but with the twist that str and dt should be # there if the categories are of that type first cat and str. s = Series(list("abbcd"), dtype="category") assert "cat" in dir(s) assert "str" in dir(s) # as it is a string categorical assert "dt" not in dir(s) # similar to cat and str s = Series(date_range("1/1/2015", periods=5)).astype("category") assert "cat" in dir(s) assert "str" not in dir(s) assert "dt" in dir(s) # as it is a datetime categorical def test_tab_completion_with_categorical(self): # test the tab completion display ok_for_cat = [ "categories", "codes", "ordered", "set_categories", "add_categories", "remove_categories", "rename_categories", "reorder_categories", "remove_unused_categories", "as_ordered", "as_unordered", ] def get_dir(s): results = [r for r in s.cat.__dir__() if not r.startswith("_")] return sorted(set(results)) s = Series(list("aabbcde")).astype("category") results = get_dir(s) tm.assert_almost_equal(results, sorted(set(ok_for_cat))) @pytest.mark.parametrize( "index", [ tm.makeUnicodeIndex(10), tm.makeStringIndex(10), tm.makeCategoricalIndex(10), Index(["foo", "bar", "baz"] * 2), tm.makeDateIndex(10), tm.makePeriodIndex(10), tm.makeTimedeltaIndex(10), tm.makeIntIndex(10), tm.makeUIntIndex(10), tm.makeIntIndex(10), tm.makeFloatIndex(10), Index([True, False]), Index([f"a{i}" for i in range(101)]), pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")), pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")), ], ) def test_index_tab_completion(self, index): # dir contains string-like values of the Index. s = Series(index=index, dtype=object) dir_s = dir(s) for i, x in enumerate(s.index.unique(level=0)): if i < 100: assert not isinstance( x, str) or not x.isidentifier() or x in dir_s else: assert x not in dir_s def test_not_hashable(self): s_empty = Series(dtype=object) s = Series([1]) msg = "'Series' objects are mutable, thus they cannot be hashed" with pytest.raises(TypeError, match=msg): hash(s_empty) with pytest.raises(TypeError, match=msg): hash(s) def test_contains(self, datetime_series): tm.assert_contains_all(datetime_series.index, datetime_series) def test_iter_datetimes(self, datetime_series): for i, val in enumerate(datetime_series): assert val == datetime_series[i] def test_iter_strings(self, string_series): for i, val in enumerate(string_series): assert val == string_series[i] def test_keys(self, datetime_series): # HACK: By doing this in two stages, we avoid 2to3 wrapping the call # to .keys() in a list() getkeys = datetime_series.keys assert getkeys() is datetime_series.index def test_values(self, datetime_series): tm.assert_almost_equal(datetime_series.values, datetime_series, check_dtype=False) def test_iteritems_datetimes(self, datetime_series): for idx, val in datetime_series.iteritems(): assert val == datetime_series[idx] def test_iteritems_strings(self, string_series): for idx, val in string_series.iteritems(): assert val == string_series[idx] # assert is lazy (generators don't define reverse, lists do) assert not hasattr(string_series.iteritems(), "reverse") def test_items_datetimes(self, datetime_series): for idx, val in datetime_series.items(): assert val == datetime_series[idx] def test_items_strings(self, string_series): for idx, val in string_series.items(): assert val == string_series[idx] # assert is lazy (generators don't define reverse, lists do) assert not hasattr(string_series.items(), "reverse") def test_raise_on_info(self): s = Series(np.random.randn(10)) msg = "'Series' object has no attribute 'info'" with pytest.raises(AttributeError, match=msg): s.info() def test_axis_alias(self): s = Series([1, 2, np.nan]) tm.assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index")) assert s.dropna().sum("rows") == 3 assert s._get_axis_number("rows") == 0 assert s._get_axis_name("rows") == "index" def test_class_axis(self): # https://github.com/pandas-dev/pandas/issues/18147 # no exception and no empty docstring assert pydoc.getdoc(Series.index) def test_numpy_unique(self, datetime_series): # it works! np.unique(datetime_series) def test_item(self): s = Series([1]) result = s.item() assert result == 1 assert result == s.iloc[0] assert isinstance(result, int) # i.e. not np.int64 ser = Series([0.5], index=[3]) result = ser.item() assert isinstance(result, float) assert result == 0.5 ser = Series([1, 2]) msg = "can only convert an array of size 1" with pytest.raises(ValueError, match=msg): ser.item() dti = pd.date_range("2016-01-01", periods=2) with pytest.raises(ValueError, match=msg): dti.item() with pytest.raises(ValueError, match=msg): Series(dti).item() val = dti[:1].item() assert isinstance(val, Timestamp) val = Series(dti)[:1].item() assert isinstance(val, Timestamp) tdi = dti - dti with pytest.raises(ValueError, match=msg): tdi.item() with pytest.raises(ValueError, match=msg): Series(tdi).item() val = tdi[:1].item() assert isinstance(val, Timedelta) val = Series(tdi)[:1].item() assert isinstance(val, Timedelta) # Case where ser[0] would not work ser = Series(dti, index=[5, 6]) val = ser[:1].item() assert val == dti[0] def test_ndarray_compat(self): # test numpy compat with Series as sub-class of NDFrame tsdf = DataFrame( np.random.randn(1000, 3), columns=["A", "B", "C"], index=date_range("1/1/2000", periods=1000), ) def f(x): return x[x.idxmax()] result = tsdf.apply(f) expected = tsdf.max() tm.assert_series_equal(result, expected) # using an ndarray like function s = Series(np.random.randn(10)) result = Series(np.ones_like(s)) expected = Series(1, index=range(10), dtype="float64") tm.assert_series_equal(result, expected) # ravel s = Series(np.random.randn(10)) tm.assert_almost_equal(s.ravel(order="F"), s.values.ravel(order="F")) def test_str_accessor_updates_on_inplace(self): s = Series(list("abc")) return_value = s.drop([0], inplace=True) assert return_value is None assert len(s.str.lower()) == 2 def test_str_attribute(self): # GH9068 methods = ["strip", "rstrip", "lstrip"] s = Series([" jack", "jill ", " jesse ", "frank"]) for method in methods: expected = Series([getattr(str, method)(x) for x in s.values]) tm.assert_series_equal( getattr(Series.str, method)(s.str), expected) # str accessor only valid with string values s = Series(range(5)) with pytest.raises(AttributeError, match="only use .str accessor"): s.str.repeat(2) def test_empty_method(self): s_empty = Series(dtype=object) assert s_empty.empty s2 = Series(index=[1], dtype=object) for full_series in [Series([1]), s2]: assert not full_series.empty @async_mark() @td.check_file_leaks async def test_tab_complete_warning(self, ip): # https://github.com/pandas-dev/pandas/issues/16409 pytest.importorskip("IPython", minversion="6.0.0") from IPython.core.completer import provisionalcompleter code = "import pandas as pd; s = Series(dtype=object)" await ip.run_code(code) # TODO: remove it when Ipython updates # GH 33567, jedi version raises Deprecation warning in Ipython import jedi if jedi.__version__ < "0.17.0": warning = tm.assert_produces_warning(None) else: warning = tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False) with warning: with provisionalcompleter("ignore"): list(ip.Completer.completions("s.", 1)) def test_integer_series_size(self): # GH 25580 s = Series(range(9)) assert s.size == 9 s = Series(range(9), dtype="Int64") assert s.size == 9 def test_attrs(self): s = Series([0, 1], name="abc") assert s.attrs == {} s.attrs["version"] = 1 result = s + 1 assert result.attrs == {"version": 1} @pytest.mark.parametrize("allows_duplicate_labels", [True, False, None]) def test_set_flags(self, allows_duplicate_labels): df = Series([1, 2]) result = df.set_flags(allows_duplicate_labels=allows_duplicate_labels) if allows_duplicate_labels is None: # We don't update when it's not provided assert result.flags.allows_duplicate_labels is True else: assert result.flags.allows_duplicate_labels is allows_duplicate_labels # We made a copy assert df is not result # We didn't mutate df assert df.flags.allows_duplicate_labels is True # But we didn't copy data result.iloc[0] = 0 assert df.iloc[0] == 0 # Now we do copy. result = df.set_flags(copy=True, allows_duplicate_labels=allows_duplicate_labels) result.iloc[0] = 10 assert df.iloc[0] == 0