def test_quantile_multi(self): qs = [.1, .9] result = self.ts.quantile(qs) expected = pd.Series([np.percentile(self.ts.dropna(), 10), np.percentile(self.ts.dropna(), 90)], index=qs, name=self.ts.name) tm.assert_series_equal(result, expected) dts = self.ts.index.to_series() dts.name = 'xxx' result = dts.quantile((.2, .2)) expected = Series([Timestamp('2000-01-10 19:12:00'), Timestamp('2000-01-10 19:12:00')], index=[.2, .2], name='xxx') tm.assert_series_equal(result, expected) result = self.ts.quantile([]) expected = pd.Series([], name=self.ts.name, index=Index( [], dtype=float)) tm.assert_series_equal(result, expected)
def test_operators_datetimelike(self): # ## timedelta64 ### td1 = Series([timedelta(minutes=5, seconds=3)] * 3) td1.iloc[2] = np.nan # ## datetime64 ### dt1 = Series([ Timestamp('20111230'), Timestamp('20120101'), Timestamp('20120103') ]) dt1.iloc[2] = np.nan dt2 = Series([ Timestamp('20111231'), Timestamp('20120102'), Timestamp('20120104') ]) dt1 - dt2 dt2 - dt1 # ## datetime64 with timetimedelta ### dt1 + td1 td1 + dt1 dt1 - td1 # TODO: Decide if this ought to work. # td1 - dt1 # ## timetimedelta with datetime64 ### td1 + dt1 dt1 + td1
def test_operators_datetimelike_invalid(self, all_arithmetic_operators): # these are all TypeEror ops op_str = all_arithmetic_operators def check(get_ser, test_ser): # check that we are getting a TypeError # with 'operate' (from core/ops.py) for the ops that are not # defined op = getattr(get_ser, op_str, None) with tm.assert_raises_regex(TypeError, 'operate|cannot'): op(test_ser) # ## timedelta64 ### td1 = Series([timedelta(minutes=5, seconds=3)] * 3) td1.iloc[2] = np.nan # ## datetime64 ### dt1 = Series([ Timestamp('20111230'), Timestamp('20120101'), Timestamp('20120103') ]) dt1.iloc[2] = np.nan dt2 = Series([ Timestamp('20111231'), Timestamp('20120102'), Timestamp('20120104') ]) if op_str not in ['__sub__', '__rsub__']: check(dt1, dt2) # ## datetime64 with timetimedelta ### # TODO(jreback) __rsub__ should raise? if op_str not in ['__add__', '__radd__', '__sub__']: check(dt1, td1) # 8260, 10763 # datetime64 with tz tz = 'US/Eastern' dt1 = Series(date_range('2000-01-01 09:00:00', periods=5, tz=tz), name='foo') dt2 = dt1.copy() dt2.iloc[2] = np.nan td1 = Series(timedelta_range('1 days 1 min', periods=5, freq='H')) td2 = td1.copy() td2.iloc[1] = np.nan if op_str not in ['__add__', '__radd__', '__sub__', '__rsub__']: check(dt2, td2)
def test_constructor_dict_datetime64_index(self): # GH 9456 dates_as_str = ['1984-02-19', '1988-11-06', '1989-12-03', '1990-03-15'] values = [42544017.198965244, 1234565, 40512335.181958228, -1] def create_data(constructor): return dict(zip((constructor(x) for x in dates_as_str), values)) data_datetime64 = create_data(np.datetime64) data_datetime = create_data(lambda x: datetime.strptime(x, '%Y-%m-%d')) data_Timestamp = create_data(Timestamp) expected = Series(values, (Timestamp(x) for x in dates_as_str)) result_datetime64 = Series(data_datetime64) result_datetime = Series(data_datetime) result_Timestamp = Series(data_Timestamp) assert_series_equal(result_datetime64, expected) assert_series_equal(result_datetime, expected) assert_series_equal(result_Timestamp, expected)
def test_convert_objects(self): s = Series([1., 2, 3], index=['a', 'b', 'c']) with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates=False, convert_numeric=True) assert_series_equal(result, s) # force numeric conversion r = s.copy().astype('O') r['a'] = '1' with tm.assert_produces_warning(FutureWarning): result = r.convert_objects(convert_dates=False, convert_numeric=True) assert_series_equal(result, s) r = s.copy().astype('O') r['a'] = '1.' with tm.assert_produces_warning(FutureWarning): result = r.convert_objects(convert_dates=False, convert_numeric=True) assert_series_equal(result, s) r = s.copy().astype('O') r['a'] = 'garbled' expected = s.copy() expected['a'] = np.nan with tm.assert_produces_warning(FutureWarning): result = r.convert_objects(convert_dates=False, convert_numeric=True) assert_series_equal(result, expected) # GH 4119, not converting a mixed type (e.g.floats and object) s = Series([1, 'na', 3, 4]) with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_numeric=True) expected = Series([1, np.nan, 3, 4]) assert_series_equal(result, expected) s = Series([1, '', 3, 4]) with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_numeric=True) expected = Series([1, np.nan, 3, 4]) assert_series_equal(result, expected) # dates s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0), datetime(2001, 1, 3, 0, 0)]) s2 = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0), datetime(2001, 1, 3, 0, 0), 'foo', 1.0, 1, Timestamp('20010104'), '20010105'], dtype='O') with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates=True, convert_numeric=False) expected = Series([Timestamp('20010101'), Timestamp('20010102'), Timestamp('20010103')], dtype='M8[ns]') assert_series_equal(result, expected) with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates='coerce', convert_numeric=False) with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates='coerce', convert_numeric=True) assert_series_equal(result, expected) expected = Series([Timestamp('20010101'), Timestamp('20010102'), Timestamp('20010103'), lib.NaT, lib.NaT, lib.NaT, Timestamp('20010104'), Timestamp('20010105')], dtype='M8[ns]') with tm.assert_produces_warning(FutureWarning): result = s2.convert_objects(convert_dates='coerce', convert_numeric=False) assert_series_equal(result, expected) with tm.assert_produces_warning(FutureWarning): result = s2.convert_objects(convert_dates='coerce', convert_numeric=True) assert_series_equal(result, expected) # preserver all-nans (if convert_dates='coerce') s = Series(['foo', 'bar', 1, 1.0], dtype='O') with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates='coerce', convert_numeric=False) expected = Series([lib.NaT] * 2 + [Timestamp(1)] * 2) assert_series_equal(result, expected) # preserver if non-object s = Series([1], dtype='float32') with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates='coerce', convert_numeric=False) assert_series_equal(result, s) # r = s.copy() # r[0] = np.nan # result = r.convert_objects(convert_dates=True,convert_numeric=False) # assert result.dtype == 'M8[ns]' # dateutil parses some single letters into today's value as a date for x in 'abcdefghijklmnopqrstuvwxyz': s = Series([x]) with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates='coerce') assert_series_equal(result, s) s = Series([x.upper()]) with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates='coerce') assert_series_equal(result, s)
def test_convert(self): # Tests: All to nans, coerce, true # Test coercion returns correct type s = Series(['a', 'b', 'c']) results = s._convert(datetime=True, coerce=True) expected = Series([lib.NaT] * 3) assert_series_equal(results, expected) results = s._convert(numeric=True, coerce=True) expected = Series([np.nan] * 3) assert_series_equal(results, expected) expected = Series([lib.NaT] * 3, dtype=np.dtype('m8[ns]')) results = s._convert(timedelta=True, coerce=True) assert_series_equal(results, expected) dt = datetime(2001, 1, 1, 0, 0) td = dt - datetime(2000, 1, 1, 0, 0) # Test coercion with mixed types s = Series(['a', '3.1415', dt, td]) results = s._convert(datetime=True, coerce=True) expected = Series([lib.NaT, lib.NaT, dt, lib.NaT]) assert_series_equal(results, expected) results = s._convert(numeric=True, coerce=True) expected = Series([nan, 3.1415, nan, nan]) assert_series_equal(results, expected) results = s._convert(timedelta=True, coerce=True) expected = Series([lib.NaT, lib.NaT, lib.NaT, td], dtype=np.dtype('m8[ns]')) assert_series_equal(results, expected) # Test standard conversion returns original results = s._convert(datetime=True) assert_series_equal(results, s) results = s._convert(numeric=True) expected = Series([nan, 3.1415, nan, nan]) assert_series_equal(results, expected) results = s._convert(timedelta=True) assert_series_equal(results, s) # test pass-through and non-conversion when other types selected s = Series(['1.0', '2.0', '3.0']) results = s._convert(datetime=True, numeric=True, timedelta=True) expected = Series([1.0, 2.0, 3.0]) assert_series_equal(results, expected) results = s._convert(True, False, True) assert_series_equal(results, s) s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)], dtype='O') results = s._convert(datetime=True, numeric=True, timedelta=True) expected = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)]) assert_series_equal(results, expected) results = s._convert(datetime=False, numeric=True, timedelta=True) assert_series_equal(results, s) td = datetime(2001, 1, 1, 0, 0) - datetime(2000, 1, 1, 0, 0) s = Series([td, td], dtype='O') results = s._convert(datetime=True, numeric=True, timedelta=True) expected = Series([td, td]) assert_series_equal(results, expected) results = s._convert(True, True, False) assert_series_equal(results, s) s = Series([1., 2, 3], index=['a', 'b', 'c']) result = s._convert(numeric=True) assert_series_equal(result, s) # force numeric conversion r = s.copy().astype('O') r['a'] = '1' result = r._convert(numeric=True) assert_series_equal(result, s) r = s.copy().astype('O') r['a'] = '1.' result = r._convert(numeric=True) assert_series_equal(result, s) r = s.copy().astype('O') r['a'] = 'garbled' result = r._convert(numeric=True) expected = s.copy() expected['a'] = nan assert_series_equal(result, expected) # GH 4119, not converting a mixed type (e.g.floats and object) s = Series([1, 'na', 3, 4]) result = s._convert(datetime=True, numeric=True) expected = Series([1, nan, 3, 4]) assert_series_equal(result, expected) s = Series([1, '', 3, 4]) result = s._convert(datetime=True, numeric=True) assert_series_equal(result, expected) # dates s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0), datetime(2001, 1, 3, 0, 0)]) s2 = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0), datetime(2001, 1, 3, 0, 0), 'foo', 1.0, 1, Timestamp('20010104'), '20010105'], dtype='O') result = s._convert(datetime=True) expected = Series([Timestamp('20010101'), Timestamp('20010102'), Timestamp('20010103')], dtype='M8[ns]') assert_series_equal(result, expected) result = s._convert(datetime=True, coerce=True) assert_series_equal(result, expected) expected = Series([Timestamp('20010101'), Timestamp('20010102'), Timestamp('20010103'), lib.NaT, lib.NaT, lib.NaT, Timestamp('20010104'), Timestamp('20010105')], dtype='M8[ns]') result = s2._convert(datetime=True, numeric=False, timedelta=False, coerce=True) assert_series_equal(result, expected) result = s2._convert(datetime=True, coerce=True) assert_series_equal(result, expected) s = Series(['foo', 'bar', 1, 1.0], dtype='O') result = s._convert(datetime=True, coerce=True) expected = Series([lib.NaT] * 2 + [Timestamp(1)] * 2) assert_series_equal(result, expected) # preserver if non-object s = Series([1], dtype='float32') result = s._convert(datetime=True, coerce=True) assert_series_equal(result, s) # r = s.copy() # r[0] = np.nan # result = r._convert(convert_dates=True,convert_numeric=False) # assert result.dtype == 'M8[ns]' # dateutil parses some single letters into today's value as a date expected = Series([lib.NaT]) for x in 'abcdefghijklmnopqrstuvwxyz': s = Series([x]) result = s._convert(datetime=True, coerce=True) assert_series_equal(result, expected) s = Series([x.upper()]) result = s._convert(datetime=True, coerce=True) assert_series_equal(result, expected)
def test_categorical_delegations(self): # invalid accessor pytest.raises(AttributeError, lambda: Series([1, 2, 3]).cat) tm.assert_raises_regex( AttributeError, r"Can only use .cat accessor with a 'category' dtype", lambda: Series([1, 2, 3]).cat) pytest.raises(AttributeError, lambda: Series(['a', 'b', 'c']).cat) pytest.raises(AttributeError, lambda: Series(np.arange(5.)).cat) pytest.raises(AttributeError, lambda: Series([Timestamp('20130101')]).cat) # Series should delegate calls to '.categories', '.codes', '.ordered' # and the methods '.set_categories()' 'drop_unused_categories()' to the # categorical# -*- coding: utf-8 -*- s = Series(Categorical(["a", "b", "c", "a"], ordered=True)) exp_categories = Index(["a", "b", "c"]) tm.assert_index_equal(s.cat.categories, exp_categories) s.cat.categories = [1, 2, 3] exp_categories = Index([1, 2, 3]) tm.assert_index_equal(s.cat.categories, exp_categories) exp_codes = Series([0, 1, 2, 0], dtype='int8') tm.assert_series_equal(s.cat.codes, exp_codes) assert s.cat.ordered s = s.cat.as_unordered() assert not s.cat.ordered s.cat.as_ordered(inplace=True) assert s.cat.ordered # reorder s = Series(Categorical(["a", "b", "c", "a"], ordered=True)) exp_categories = Index(["c", "b", "a"]) exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_) s = s.cat.set_categories(["c", "b", "a"]) tm.assert_index_equal(s.cat.categories, exp_categories) tm.assert_numpy_array_equal(s.values.__array__(), exp_values) tm.assert_numpy_array_equal(s.__array__(), exp_values) # remove unused categories s = Series( Categorical(["a", "b", "b", "a"], categories=["a", "b", "c"])) exp_categories = Index(["a", "b"]) exp_values = np.array(["a", "b", "b", "a"], dtype=np.object_) s = s.cat.remove_unused_categories() tm.assert_index_equal(s.cat.categories, exp_categories) tm.assert_numpy_array_equal(s.values.__array__(), exp_values) tm.assert_numpy_array_equal(s.__array__(), exp_values) # This method is likely to be confused, so test that it raises an error # on wrong inputs: def f(): s.set_categories([4, 3, 2, 1]) pytest.raises(Exception, f) # right: s.cat.set_categories([4,3,2,1]) # GH18862 (let Series.cat.rename_categories take callables) s = Series(Categorical(["a", "b", "c", "a"], ordered=True)) result = s.cat.rename_categories(lambda x: x.upper()) expected = Series( Categorical(["A", "B", "C", "A"], categories=["A", "B", "C"], ordered=True)) tm.assert_series_equal(result, expected)
def test_constructor_with_datetime_tz(self): # 8260 # support datetime64 with tz dr = date_range('20130101', periods=3, tz='US/Eastern') s = Series(dr) assert s.dtype.name == 'datetime64[ns, US/Eastern]' assert s.dtype == 'datetime64[ns, US/Eastern]' assert is_datetime64tz_dtype(s.dtype) assert 'datetime64[ns, US/Eastern]' in str(s) # export result = s.values assert isinstance(result, np.ndarray) assert result.dtype == 'datetime64[ns]' exp = pd.DatetimeIndex(result) exp = exp.tz_localize('UTC').tz_convert(tz=s.dt.tz) tm.assert_index_equal(dr, exp) # indexing result = s.iloc[0] self.assertEqual( result, Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', freq='D')) result = s[0] self.assertEqual( result, Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', freq='D')) result = s[Series([True, True, False], index=s.index)] assert_series_equal(result, s[0:2]) result = s.iloc[0:1] assert_series_equal(result, Series(dr[0:1])) # concat result = pd.concat([s.iloc[0:1], s.iloc[1:]]) assert_series_equal(result, s) # astype result = s.astype(object) expected = Series(DatetimeIndex(s._values).asobject) assert_series_equal(result, expected) result = Series(s.values).dt.tz_localize('UTC').dt.tz_convert(s.dt.tz) assert_series_equal(result, s) # astype - datetime64[ns, tz] result = Series(s.values).astype('datetime64[ns, US/Eastern]') assert_series_equal(result, s) result = Series(s.values).astype(s.dtype) assert_series_equal(result, s) result = s.astype('datetime64[ns, CET]') expected = Series(date_range('20130101 06:00:00', periods=3, tz='CET')) assert_series_equal(result, expected) # short str assert 'datetime64[ns, US/Eastern]' in str(s) # formatting with NaT result = s.shift() assert 'datetime64[ns, US/Eastern]' in str(result) assert 'NaT' in str(result) # long str t = Series(date_range('20130101', periods=1000, tz='US/Eastern')) assert 'datetime64[ns, US/Eastern]' in str(t) result = pd.DatetimeIndex(s, freq='infer') tm.assert_index_equal(result, dr) # inference s = Series([ pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific') ]) assert s.dtype == 'datetime64[ns, US/Pacific]' assert lib.infer_dtype(s) == 'datetime64' s = Series([ pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Eastern') ]) assert s.dtype == 'object' assert lib.infer_dtype(s) == 'datetime' # with all NaT s = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]') expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern')) assert_series_equal(s, expected)
def test_constructor_dtype_datetime64(self): s = Series(iNaT, dtype='M8[ns]', index=lrange(5)) assert isnull(s).all() # in theory this should be all nulls, but since # we are not specifying a dtype is ambiguous s = Series(iNaT, index=lrange(5)) assert not isnull(s).all() s = Series(nan, dtype='M8[ns]', index=lrange(5)) assert isnull(s).all() s = Series([datetime(2001, 1, 2, 0, 0), iNaT], dtype='M8[ns]') assert isnull(s[1]) self.assertEqual(s.dtype, 'M8[ns]') s = Series([datetime(2001, 1, 2, 0, 0), nan], dtype='M8[ns]') assert isnull(s[1]) self.assertEqual(s.dtype, 'M8[ns]') # GH3416 dates = [ np.datetime64(datetime(2013, 1, 1)), np.datetime64(datetime(2013, 1, 2)), np.datetime64(datetime(2013, 1, 3)), ] s = Series(dates) self.assertEqual(s.dtype, 'M8[ns]') s.iloc[0] = np.nan self.assertEqual(s.dtype, 'M8[ns]') # invalid astypes for t in ['s', 'D', 'us', 'ms']: pytest.raises(TypeError, s.astype, 'M8[%s]' % t) # GH3414 related pytest.raises( TypeError, lambda x: Series(Series(dates).astype('int') / 1000000, dtype='M8[ms]')) pytest.raises(TypeError, lambda x: Series(dates, dtype='datetime64')) # invalid dates can be help as object result = Series([datetime(2, 1, 1)]) self.assertEqual(result[0], datetime(2, 1, 1, 0, 0)) result = Series([datetime(3000, 1, 1)]) self.assertEqual(result[0], datetime(3000, 1, 1, 0, 0)) # don't mix types result = Series([Timestamp('20130101'), 1], index=['a', 'b']) self.assertEqual(result['a'], Timestamp('20130101')) self.assertEqual(result['b'], 1) # GH6529 # coerce datetime64 non-ns properly dates = date_range('01-Jan-2015', '01-Dec-2015', freq='M') values2 = dates.view(np.ndarray).astype('datetime64[ns]') expected = Series(values2, index=dates) for dtype in ['s', 'D', 'ms', 'us', 'ns']: values1 = dates.view(np.ndarray).astype('M8[{0}]'.format(dtype)) result = Series(values1, dates) assert_series_equal(result, expected) # GH 13876 # coerce to non-ns to object properly expected = Series(values2, index=dates, dtype=object) for dtype in ['s', 'D', 'ms', 'us', 'ns']: values1 = dates.view(np.ndarray).astype('M8[{0}]'.format(dtype)) result = Series(values1, index=dates, dtype=object) assert_series_equal(result, expected) # leave datetime.date alone dates2 = np.array([d.date() for d in dates.to_pydatetime()], dtype=object) series1 = Series(dates2, dates) tm.assert_numpy_array_equal(series1.values, dates2) self.assertEqual(series1.dtype, object) # these will correctly infer a datetime s = Series([None, pd.NaT, '2013-08-05 15:30:00.000001']) self.assertEqual(s.dtype, 'datetime64[ns]') s = Series([np.nan, pd.NaT, '2013-08-05 15:30:00.000001']) self.assertEqual(s.dtype, 'datetime64[ns]') s = Series([pd.NaT, None, '2013-08-05 15:30:00.000001']) self.assertEqual(s.dtype, 'datetime64[ns]') s = Series([pd.NaT, np.nan, '2013-08-05 15:30:00.000001']) self.assertEqual(s.dtype, 'datetime64[ns]') # tz-aware (UTC and other tz's) # GH 8411 dr = date_range('20130101', periods=3) assert Series(dr).iloc[0].tz is None dr = date_range('20130101', periods=3, tz='UTC') assert str(Series(dr).iloc[0].tz) == 'UTC' dr = date_range('20130101', periods=3, tz='US/Eastern') assert str(Series(dr).iloc[0].tz) == 'US/Eastern' # non-convertible s = Series([1479596223000, -1479590, pd.NaT]) assert s.dtype == 'object' assert s[2] is pd.NaT assert 'NaT' in str(s) # if we passed a NaT it remains s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), pd.NaT]) assert s.dtype == 'object' assert s[2] is pd.NaT assert 'NaT' in str(s) # if we passed a nan it remains s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), np.nan]) assert s.dtype == 'object' assert s[2] is np.nan assert 'NaN' in str(s)
def test_categorical_delegations(self): # invalid accessor msg = r"Can only use \.cat accessor with a 'category' dtype" with pytest.raises(AttributeError, match=msg): Series([1, 2, 3]).cat with pytest.raises(AttributeError, match=msg): Series([1, 2, 3]).cat() with pytest.raises(AttributeError, match=msg): Series(["a", "b", "c"]).cat with pytest.raises(AttributeError, match=msg): Series(np.arange(5.0)).cat with pytest.raises(AttributeError, match=msg): Series([Timestamp("20130101")]).cat # Series should delegate calls to '.categories', '.codes', '.ordered' # and the methods '.set_categories()' 'drop_unused_categories()' to the # categorical s = Series(Categorical(["a", "b", "c", "a"], ordered=True)) exp_categories = Index(["a", "b", "c"]) tm.assert_index_equal(s.cat.categories, exp_categories) s.cat.categories = [1, 2, 3] exp_categories = Index([1, 2, 3]) tm.assert_index_equal(s.cat.categories, exp_categories) exp_codes = Series([0, 1, 2, 0], dtype="int8") tm.assert_series_equal(s.cat.codes, exp_codes) assert s.cat.ordered s = s.cat.as_unordered() assert not s.cat.ordered s.cat.as_ordered(inplace=True) assert s.cat.ordered # reorder s = Series(Categorical(["a", "b", "c", "a"], ordered=True)) exp_categories = Index(["c", "b", "a"]) exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_) s = s.cat.set_categories(["c", "b", "a"]) tm.assert_index_equal(s.cat.categories, exp_categories) tm.assert_numpy_array_equal(s.values.__array__(), exp_values) tm.assert_numpy_array_equal(s.__array__(), exp_values) # remove unused categories s = Series( Categorical(["a", "b", "b", "a"], categories=["a", "b", "c"])) exp_categories = Index(["a", "b"]) exp_values = np.array(["a", "b", "b", "a"], dtype=np.object_) s = s.cat.remove_unused_categories() tm.assert_index_equal(s.cat.categories, exp_categories) tm.assert_numpy_array_equal(s.values.__array__(), exp_values) tm.assert_numpy_array_equal(s.__array__(), exp_values) # This method is likely to be confused, so test that it raises an error # on wrong inputs: msg = "'Series' object has no attribute 'set_categories'" with pytest.raises(AttributeError, match=msg): s.set_categories([4, 3, 2, 1]) # right: s.cat.set_categories([4,3,2,1]) # GH18862 (let Series.cat.rename_categories take callables) s = Series(Categorical(["a", "b", "c", "a"], ordered=True)) result = s.cat.rename_categories(lambda x: x.upper()) expected = Series( Categorical(["A", "B", "C", "A"], categories=["A", "B", "C"], ordered=True)) tm.assert_series_equal(result, expected)
class TestSeriesQuantile: def test_quantile(self, datetime_series): q = datetime_series.quantile(0.1) assert q == np.percentile(datetime_series.dropna(), 10) q = datetime_series.quantile(0.9) assert q == np.percentile(datetime_series.dropna(), 90) # object dtype q = Series(datetime_series, dtype=object).quantile(0.9) assert q == np.percentile(datetime_series.dropna(), 90) # datetime64[ns] dtype dts = datetime_series.index.to_series() q = dts.quantile(0.2) assert q == Timestamp("2000-01-10 19:12:00") # timedelta64[ns] dtype tds = dts.diff() q = tds.quantile(0.25) assert q == pd.to_timedelta("24:00:00") # GH7661 result = Series([np.timedelta64("NaT")]).sum() assert result == pd.Timedelta(0) msg = "percentiles should all be in the interval \\[0, 1\\]" for invalid in [-1, 2, [0.5, -1], [0.5, 2]]: with pytest.raises(ValueError, match=msg): datetime_series.quantile(invalid) def test_quantile_multi(self, datetime_series): qs = [0.1, 0.9] result = datetime_series.quantile(qs) expected = Series( [ np.percentile(datetime_series.dropna(), 10), np.percentile(datetime_series.dropna(), 90), ], index=qs, name=datetime_series.name, ) tm.assert_series_equal(result, expected) dts = datetime_series.index.to_series() dts.name = "xxx" result = dts.quantile((0.2, 0.2)) expected = Series( [Timestamp("2000-01-10 19:12:00"), Timestamp("2000-01-10 19:12:00")], index=[0.2, 0.2], name="xxx", ) tm.assert_series_equal(result, expected) result = datetime_series.quantile([]) expected = Series( [], name=datetime_series.name, index=Index([], dtype=float), dtype="float64" ) tm.assert_series_equal(result, expected) def test_quantile_interpolation(self, datetime_series): # see gh-10174 # interpolation = linear (default case) q = datetime_series.quantile(0.1, interpolation="linear") assert q == np.percentile(datetime_series.dropna(), 10) q1 = datetime_series.quantile(0.1) assert q1 == np.percentile(datetime_series.dropna(), 10) # test with and without interpolation keyword assert q == q1 def test_quantile_interpolation_dtype(self): # GH #10174 # interpolation = linear (default case) q = Series([1, 3, 4]).quantile(0.5, interpolation="lower") assert q == np.percentile(np.array([1, 3, 4]), 50) assert is_integer(q) q = Series([1, 3, 4]).quantile(0.5, interpolation="higher") assert q == np.percentile(np.array([1, 3, 4]), 50) assert is_integer(q) def test_quantile_nan(self): # GH 13098 s = Series([1, 2, 3, 4, np.nan]) result = s.quantile(0.5) expected = 2.5 assert result == expected # all nan/empty s1 = Series([], dtype=object) cases = [s1, Series([np.nan, np.nan])] for s in cases: res = s.quantile(0.5) assert np.isnan(res) res = s.quantile([0.5]) tm.assert_series_equal(res, Series([np.nan], index=[0.5])) res = s.quantile([0.2, 0.3]) tm.assert_series_equal(res, Series([np.nan, np.nan], index=[0.2, 0.3])) @pytest.mark.parametrize( "case", [ [ Timestamp("2011-01-01"), Timestamp("2011-01-02"), Timestamp("2011-01-03"), ], [ Timestamp("2011-01-01", tz="US/Eastern"), Timestamp("2011-01-02", tz="US/Eastern"), Timestamp("2011-01-03", tz="US/Eastern"), ], [pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")], # NaT [ Timestamp("2011-01-01"), Timestamp("2011-01-02"), Timestamp("2011-01-03"), pd.NaT, ], [ Timestamp("2011-01-01", tz="US/Eastern"), Timestamp("2011-01-02", tz="US/Eastern"), Timestamp("2011-01-03", tz="US/Eastern"), pd.NaT, ], [ pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days"), pd.NaT, ], ], ) def test_quantile_box(self, case): s = Series(case, name="XXX") res = s.quantile(0.5) assert res == case[1] res = s.quantile([0.5]) exp = Series([case[1]], index=[0.5], name="XXX") tm.assert_series_equal(res, exp) def test_datetime_timedelta_quantiles(self): # covers #9694 assert pd.isna(Series([], dtype="M8[ns]").quantile(0.5)) assert pd.isna(Series([], dtype="m8[ns]").quantile(0.5)) def test_quantile_nat(self): res = Series([pd.NaT, pd.NaT]).quantile(0.5) assert res is pd.NaT res = Series([pd.NaT, pd.NaT]).quantile([0.5]) tm.assert_series_equal(res, Series([pd.NaT], index=[0.5])) @pytest.mark.parametrize( "values, dtype", [([0, 0, 0, 1, 2, 3], "Sparse[int]"), ([0.0, None, 1.0, 2.0], "Sparse[float]")], ) def test_quantile_sparse(self, values, dtype): ser = Series(values, dtype=dtype) result = ser.quantile([0.5]) expected = Series(np.asarray(ser)).quantile([0.5]) tm.assert_series_equal(result, expected) def test_quantile_empty(self): # floats s = Series([], dtype="float64") res = s.quantile(0.5) assert np.isnan(res) res = s.quantile([0.5]) exp = Series([np.nan], index=[0.5]) tm.assert_series_equal(res, exp) # int s = Series([], dtype="int64") res = s.quantile(0.5) assert np.isnan(res) res = s.quantile([0.5]) exp = Series([np.nan], index=[0.5]) tm.assert_series_equal(res, exp) # datetime s = Series([], dtype="datetime64[ns]") res = s.quantile(0.5) assert res is pd.NaT res = s.quantile([0.5]) exp = Series([pd.NaT], index=[0.5]) tm.assert_series_equal(res, exp) @pytest.mark.parametrize("dtype", [int, float, "Int64"]) def test_quantile_dtypes(self, dtype): result = Series([1, 2, 3], dtype=dtype).quantile(np.arange(0, 1, 0.25)) expected = Series(np.arange(1, 3, 0.5), index=np.arange(0, 1, 0.25)) if dtype == "Int64": expected = expected.astype("Float64") tm.assert_series_equal(result, expected)