def _wrap_results(result, dtype): """ wrap our results if needed """ if issubclass(dtype.type, np.datetime64): if not isinstance(result, np.ndarray): result = lib.Timestamp(result) else: result = result.view(dtype) elif issubclass(dtype.type, np.timedelta64): if not isinstance(result, np.ndarray): # this is a scalar timedelta result! # we have series convert then take the element (scalar) # as series will do the right thing in py3 (and deal with numpy # 1.6.2 bug in that it results dtype of timedelta64[us] from pandas import Series # coerce float to results if is_float(result): result = int(result) result = Series([result], dtype='timedelta64[ns]') else: result = result.view(dtype) return result
def test_tz_aware_asfreq(self): dr = date_range('2011-12-01','2012-07-20',freq = 'D', tz = 'US/Eastern') s = Series(np.random.randn(len(dr)), index=dr) # it works! s.asfreq('T')
def _delegate_property_get(self, name): from pandas import Series result = getattr(self.values, name) # maybe need to upcast (ints) if isinstance(result, np.ndarray): if is_integer_dtype(result): result = result.astype('int64') elif not is_list_like(result): return result result = np.asarray(result) # blow up if we operate on categories if self.orig is not None: result = take_1d(result, self.orig.cat.codes) # return the result as a Series, which is by definition a copy result = Series(result, index=self.index, name=self.name) # setting this object will show a SettingWithCopyWarning/Error result.is_copy = ("modifications to a property of a datetimelike " "object are not supported and are discarded. " "Change values on the original.") return result
def quarter_plot(x, dates=None, ylabel=None, ax=None): """ Seasonal plot of quarterly data Parameters ---------- x : array-like Seasonal data to plot. If dates is None, x must be a pandas object with a PeriodIndex or DatetimeIndex with a monthly frequency. dates : array-like, optional If `x` is not a pandas object, then dates must be supplied. ylabel : str, optional The label for the y-axis. Will attempt to use the `name` attribute of the Series. ax : matplotlib.axes, optional Existing axes instance. Returns ------- matplotlib.Figure """ from pandas import DataFrame if dates is None: from statsmodels.tools.data import _check_period_index _check_period_index(x, freq="Q") else: from pandas import Series, PeriodIndex x = Series(x, index=PeriodIndex(dates, freq="Q")) xticklabels = ['q1', 'q2', 'q3', 'q4'] return seasonal_plot(x.groupby(lambda y : y.quarter), xticklabels, ylabel=ylabel, ax=ax)
def test_tz_aware_asfreq(self): dr = date_range("2011-12-01", "2012-07-20", freq="D", tz=self.tzstr("US/Eastern")) s = Series(np.random.randn(len(dr)), index=dr) # it works! s.asfreq("T")
def test_shift_dst(self): # GH 13926 dates = date_range('2016-11-06', freq='H', periods=10, tz='US/Eastern') s = Series(dates) res = s.shift(0) tm.assert_series_equal(res, s) self.assertEqual(res.dtype, 'datetime64[ns, US/Eastern]') res = s.shift(1) exp_vals = [NaT] + dates.asobject.values.tolist()[:9] exp = Series(exp_vals) tm.assert_series_equal(res, exp) self.assertEqual(res.dtype, 'datetime64[ns, US/Eastern]') res = s.shift(-2) exp_vals = dates.asobject.values.tolist()[2:] + [NaT, NaT] exp = Series(exp_vals) tm.assert_series_equal(res, exp) self.assertEqual(res.dtype, 'datetime64[ns, US/Eastern]') for ex in [10, -10, 20, -20]: res = s.shift(ex) exp = Series([NaT] * 10, dtype='datetime64[ns, US/Eastern]') tm.assert_series_equal(res, exp) self.assertEqual(res.dtype, 'datetime64[ns, US/Eastern]')
def test_constructor_series(self): index1 = ['d', 'b', 'a', 'c'] index2 = sorted(index1) s1 = Series([4, 7, -5, 3], index=index1) s2 = Series(s1, index=index2) assert_series_equal(s2, s1.sort_index())
def test_endswith(self): values = Series(['om', NA, 'foo_nom', 'nom', 'bar_foo', NA, 'foo']) result = values.str.endswith('foo') exp = Series([False, NA, False, False, True, NA, True]) tm.assert_series_equal(result, exp) # mixed mixed = ['a', NA, 'b', True, datetime.today(), 'foo', None, 1, 2.] rs = strings.str_endswith(mixed, 'f') xp = [False, NA, False, NA, NA, False, NA, NA, NA] tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.endswith('f') tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode values = Series([u('om'), NA, u('foo_nom'), u('nom'), u('bar_foo'), NA, u('foo')]) result = values.str.endswith('foo') exp = Series([False, NA, False, False, True, NA, True]) tm.assert_series_equal(result, exp) result = values.str.endswith('foo', na=False) tm.assert_series_equal(result, exp.fillna(False).astype(bool))
def test_constructor_with_datetimelike(self): # 12077 # constructor wwth a datetimelike and NaT for dtl in [date_range('1995-01-01 00:00:00', periods=5, freq='s'), date_range('1995-01-01 00:00:00', periods=5, freq='s', tz='US/Eastern'), timedelta_range('1 day', periods=5, freq='s')]: s = Series(dtl) c = Categorical(s) expected = type(dtl)(s) expected.freq = None tm.assert_index_equal(c.categories, expected) tm.assert_numpy_array_equal(c.codes, np.arange(5, dtype='int8')) # with NaT s2 = s.copy() s2.iloc[-1] = NaT c = Categorical(s2) expected = type(dtl)(s2.dropna()) expected.freq = None tm.assert_index_equal(c.categories, expected) exp = np.array([0, 1, 2, 3, -1], dtype=np.int8) tm.assert_numpy_array_equal(c.codes, exp) result = repr(c) assert 'NaT' in result
def test_valid_dt_with_missing_values(self): from datetime import date, time # GH 8689 s = Series(date_range('20130101', periods=5, freq='D')) s.iloc[2] = pd.NaT for attr in ['microsecond', 'nanosecond', 'second', 'minute', 'hour', 'day']: expected = getattr(s.dt, attr).copy() expected.iloc[2] = np.nan result = getattr(s.dt, attr) tm.assert_series_equal(result, expected) result = s.dt.date expected = Series( [date(2013, 1, 1), date(2013, 1, 2), np.nan, date(2013, 1, 4), date(2013, 1, 5)], dtype='object') tm.assert_series_equal(result, expected) result = s.dt.time expected = Series( [time(0), time(0), np.nan, time(0), time(0)], dtype='object') tm.assert_series_equal(result, expected)
def shiftTs(): dates = [datetime(2014,1,2), datetime(2014,1,3), datetime(2014,1,4), datetime(2014,1,5)] ts1 = Series(np.arange(4)+2, index=dates) #ts1 = ts1/ts1.shift(1) - 1 print (ts1) ts1 = ts1.shift(1, freq='M') print (ts1)
def test_mixed_index_assignment(self): # GH 19860 s = Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 1, 2]) s.at['a'] = 11 assert s.iat[0] == 11 s.at[1] = 22 assert s.iat[3] == 22
def test_timedelta(self): converter = lambda x: pd.to_timedelta(x, unit='ms') s = Series([timedelta(23), timedelta(seconds=5)]) self.assertEqual(s.dtype, 'timedelta64[ns]') result = pd.read_json(s.to_json(), typ='series').apply(converter) assert_series_equal(result, s) s = Series([timedelta(23), timedelta(seconds=5)], index=pd.Index([0, 1])) self.assertEqual(s.dtype, 'timedelta64[ns]') result = pd.read_json(s.to_json(), typ='series').apply(converter) assert_series_equal(result, s) frame = DataFrame([timedelta(23), timedelta(seconds=5)]) self.assertEqual(frame[0].dtype, 'timedelta64[ns]') assert_frame_equal(frame, pd.read_json(frame.to_json()) .apply(converter)) frame = DataFrame({'a': [timedelta(days=23), timedelta(seconds=5)], 'b': [1, 2], 'c': pd.date_range(start='20130101', periods=2)}) result = pd.read_json(frame.to_json(date_unit='ns')) result['a'] = pd.to_timedelta(result.a, unit='ns') result['c'] = pd.to_datetime(result.c) assert_frame_equal(frame, result)
def test_annual_upsample(self): targets = ['D', 'B', 'M'] for month in MONTHS: ts = _simple_pts('1/1/1990', '12/31/1995', freq='A-%s' % month) for targ, conv, meth in product(targets, ['start', 'end'], ['ffill', 'bfill']): result = ts.resample(targ, fill_method=meth, convention=conv) expected = result.to_timestamp(targ, how=conv) expected = expected.asfreq(targ, meth).to_period() assert_series_equal(result, expected) df = DataFrame({'a' : ts}) rdf = df.resample('D', fill_method='ffill') exp = df['a'].resample('D', fill_method='ffill') assert_series_equal(rdf['a'], exp) rng = period_range('2000', '2003', freq='A-DEC') ts = Series([1, 2, 3, 4], index=rng) result = ts.resample('M', fill_method='ffill') ex_index = period_range('2000-01', '2003-12', freq='M') expected = ts.asfreq('M', how='start').reindex(ex_index, method='ffill') assert_series_equal(result, expected)
def test_tshift(self): # PeriodIndex ps = tm.makePeriodSeries() shifted = ps.tshift(1) unshifted = shifted.tshift(-1) assert_series_equal(unshifted, ps) shifted2 = ps.tshift(freq='B') assert_series_equal(shifted, shifted2) shifted3 = ps.tshift(freq=datetools.bday) assert_series_equal(shifted, shifted3) self.assertRaises(ValueError, ps.tshift, freq='M') # DatetimeIndex shifted = self.ts.tshift(1) unshifted = shifted.tshift(-1) assert_series_equal(self.ts, unshifted) shifted2 = self.ts.tshift(freq=self.ts.index.freq) assert_series_equal(shifted, shifted2) inferred_ts = Series(self.ts.values, Index(np.asarray(self.ts.index)), name='ts') shifted = inferred_ts.tshift(1) unshifted = shifted.tshift(-1) assert_series_equal(shifted, self.ts.tshift(1)) assert_series_equal(unshifted, inferred_ts) no_freq = self.ts[[0, 5, 7]] self.assertRaises(ValueError, no_freq.tshift)
def test_nat_operations(): # GH 8617 s = Series([0, pd.NaT], dtype='m8[ns]') exp = s[0] assert s.median() == exp assert s.min() == exp assert s.max() == exp
def test_getitem_setitem_datetime_tz_pytz(self): tm._skip_if_no_pytz() from pytz import timezone as tz from pandas import date_range N = 50 # testing with timezone, GH #2785 rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern') ts = Series(np.random.randn(N), index=rng) # also test Timestamp tz handling, GH #2789 result = ts.copy() result["1990-01-01 09:00:00+00:00"] = 0 result["1990-01-01 09:00:00+00:00"] = ts[4] assert_series_equal(result, ts) result = ts.copy() result["1990-01-01 03:00:00-06:00"] = 0 result["1990-01-01 03:00:00-06:00"] = ts[4] assert_series_equal(result, ts) # repeat with datetimes result = ts.copy() result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0 result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4] assert_series_equal(result, ts) result = ts.copy() # comparison dates with datetime MUST be localized! date = tz('US/Central').localize(datetime(1990, 1, 1, 3)) result[date] = 0 result[date] = ts[4] assert_series_equal(result, ts)
def test_astype(self): s = Series(np.random.randn(5), name='foo') for dtype in ['float32', 'float64', 'int64', 'int32']: astyped = s.astype(dtype) self.assertEqual(astyped.dtype, dtype) self.assertEqual(astyped.name, s.name)
def test_repr_unicode(self): s = Series([u'\u03c3'] * 10) repr(s) a = Series([u"\u05d0"] * 1000) a.name = 'title1' repr(a)
def test_append_concat(self): rng = date_range('5/8/2012 1:45', periods=10, freq='5T') ts = Series(np.random.randn(len(rng)), rng) df = DataFrame(np.random.randn(len(rng), 4), index=rng) result = ts.append(ts) result_df = df.append(df) ex_index = DatetimeIndex(np.tile(rng.values, 2)) tm.assert_index_equal(result.index, ex_index) tm.assert_index_equal(result_df.index, ex_index) appended = rng.append(rng) tm.assert_index_equal(appended, ex_index) appended = rng.append([rng, rng]) ex_index = DatetimeIndex(np.tile(rng.values, 3)) tm.assert_index_equal(appended, ex_index) # different index names rng1 = rng.copy() rng2 = rng.copy() rng1.name = 'foo' rng2.name = 'bar' assert rng1.append(rng1).name == 'foo' assert rng1.append(rng2).name is None
def test_constructor(self): assert self.ts.index.is_all_dates # Pass in Series derived = Series(self.ts) assert derived.index.is_all_dates assert tm.equalContents(derived.index, self.ts.index) # Ensure new index is not created assert id(self.ts.index) == id(derived.index) # Mixed type Series mixed = Series(['hello', np.NaN], index=[0, 1]) assert mixed.dtype == np.object_ assert mixed[1] is np.NaN assert not self.empty.index.is_all_dates assert not Series({}).index.is_all_dates pytest.raises(Exception, Series, np.random.randn(3, 3), index=np.arange(3)) mixed.name = 'Series' rs = Series(mixed).name xp = 'Series' assert rs == xp # raise on MultiIndex GH4187 m = MultiIndex.from_arrays([[1, 2], [3, 4]]) pytest.raises(NotImplementedError, Series, m)
def test_first_last_nth_dtypes(df_mixed_floats): df = df_mixed_floats.copy() df['E'] = True df['F'] = 1 # tests for first / last / nth grouped = df.groupby('A') first = grouped.first() expected = df.loc[[1, 0], ['B', 'C', 'D', 'E', 'F']] expected.index = Index(['bar', 'foo'], name='A') expected = expected.sort_index() assert_frame_equal(first, expected) last = grouped.last() expected = df.loc[[5, 7], ['B', 'C', 'D', 'E', 'F']] expected.index = Index(['bar', 'foo'], name='A') expected = expected.sort_index() assert_frame_equal(last, expected) nth = grouped.nth(1) expected = df.loc[[3, 2], ['B', 'C', 'D', 'E', 'F']] expected.index = Index(['bar', 'foo'], name='A') expected = expected.sort_index() assert_frame_equal(nth, expected) # GH 2763, first/last shifting dtypes idx = lrange(10) idx.append(9) s = Series(data=lrange(11), index=idx, name='IntCol') assert s.dtype == 'int64' f = s.groupby(level=0).first() assert f.dtype == 'int64'
def test_mixed_freq_irreg_period(self): ts = tm.makeTimeSeries() irreg = ts[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 16, 17, 18, 29]] rng = period_range('1/3/2000', periods=30, freq='B') ps = Series(np.random.randn(len(rng)), rng) irreg.plot() ps.plot()
def test_upsample_with_limit(self): rng = date_range('1/1/2000', periods=3, freq='5t') ts = Series(np.random.randn(len(rng)), rng) result = ts.resample('t', fill_method='ffill', limit=2) expected = ts.reindex(result.index, method='ffill', limit=2) assert_series_equal(result, expected)
def test_get(): # GH 6383 s = Series(np.array([43, 48, 60, 48, 50, 51, 50, 45, 57, 48, 56, 45, 51, 39, 55, 43, 54, 52, 51, 54])) result = s.get(25, 0) expected = 0 assert result == expected s = Series(np.array([43, 48, 60, 48, 50, 51, 50, 45, 57, 48, 56, 45, 51, 39, 55, 43, 54, 52, 51, 54]), index=pd.Float64Index( [25.0, 36.0, 49.0, 64.0, 81.0, 100.0, 121.0, 144.0, 169.0, 196.0, 1225.0, 1296.0, 1369.0, 1444.0, 1521.0, 1600.0, 1681.0, 1764.0, 1849.0, 1936.0], dtype='object')) result = s.get(25, 0) expected = 43 assert result == expected # GH 7407 # with a boolean accessor df = pd.DataFrame({'i': [0] * 3, 'b': [False] * 3}) vc = df.i.value_counts() result = vc.get(99, default='Missing') assert result == 'Missing' vc = df.b.value_counts() result = vc.get(False, default='Missing') assert result == 3 result = vc.get(True, default='Missing') assert result == 'Missing'
def test_quarterly_resampling(self): rng = period_range('2000Q1', periods=10, freq='Q-DEC') ts = Series(np.arange(10), index=rng) result = ts.resample('A') exp = ts.to_timestamp().resample('A').to_period() assert_series_equal(result, exp)
def test_join_aware(self): rng = date_range('1/1/2011', periods=10, freq='H') ts = Series(np.random.randn(len(rng)), index=rng) ts_utc = ts.tz_localize('utc') self.assertRaises(Exception, ts.__add__, ts_utc) self.assertRaises(Exception, ts_utc.__add__, ts) test1 = DataFrame(np.zeros((6,3)), index=date_range("2012-11-15 00:00:00", periods=6, freq="100L", tz="US/Central")) test2 = DataFrame(np.zeros((3,3)), index=date_range("2012-11-15 00:00:00", periods=3, freq="250L", tz="US/Central"), columns=range(3,6)) result = test1.join(test2, how='outer') ex_index = test1.index.union(test2.index) self.assertTrue(result.index.equals(ex_index)) self.assertTrue(result.index.tz.zone == 'US/Central') # non-overlapping rng = date_range("2012-11-15 00:00:00", periods=6, freq="H", tz="US/Central") rng2 = date_range("2012-11-15 12:00:00", periods=6, freq="H", tz="US/Eastern") result = rng.union(rng2) self.assertTrue(result.tz.zone == 'UTC')
def test_all_values_single_bin(self): # 2070 index = period_range(start="2012-01-01", end="2012-12-31", freq="M") s = Series(np.random.randn(len(index)), index=index) result = s.resample("A", how='mean') tm.assert_almost_equal(result[0], s.mean())
def test_coercion_with_loc_and_series(self): for start_data, expected_result in self.EXPECTED_RESULTS: start_series = Series(start_data) start_series.loc[start_series == start_series[0]] = None expected_series = Series(expected_result) tm.assert_series_equal(start_series, expected_series)
def test_from_weekly_resampling(self): idxh = date_range('1/1/1999', periods=52, freq='W') idxl = date_range('1/1/1999', periods=12, freq='M') high = Series(np.random.randn(len(idxh)), idxh) low = Series(np.random.randn(len(idxl)), idxl) low.plot() ax = high.plot() expected_h = idxh.to_period().asi8.astype(np.float64) expected_l = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549, 1553, 1558, 1562], dtype=np.float64) for l in ax.get_lines(): self.assertTrue(PeriodIndex(data=l.get_xdata()).freq, idxh.freq) xdata = l.get_xdata(orig=False) if len(xdata) == 12: # idxl lines self.assert_numpy_array_equal(xdata, expected_l) else: self.assert_numpy_array_equal(xdata, expected_h) tm.close() # tsplot from pandas.tseries.plotting import tsplot import matplotlib.pyplot as plt tsplot(low, plt.Axes.plot) lines = tsplot(high, plt.Axes.plot) for l in lines: self.assertTrue(PeriodIndex(data=l.get_xdata()).freq, idxh.freq) xdata = l.get_xdata(orig=False) if len(xdata) == 12: # idxl lines self.assert_numpy_array_equal(xdata, expected_l) else: self.assert_numpy_array_equal(xdata, expected_h)