def test_resample_upsampling_picked_but_not_correct(): # Test for issue #3020 dates = date_range('01-Jan-2014', '05-Jan-2014', freq='D') series = Series(1, index=dates) result = series.resample('D').mean() assert result.index[0] == dates[0] # GH 5955 # incorrect deciding to upsample when the axis frequency matches the # resample frequency s = Series(np.arange(1., 6), index=[datetime( 1975, 1, i, 12, 0) for i in range(1, 6)]) expected = Series(np.arange(1., 6), index=date_range( '19750101', periods=5, freq='D')) result = s.resample('D').count() assert_series_equal(result, Series(1, index=expected.index)) result1 = s.resample('D').sum() result2 = s.resample('D').mean() assert_series_equal(result1, expected) assert_series_equal(result2, expected)
def test_resample_upsampling_picked_but_not_correct(self): # Test for issue #3020 dates = date_range('01-Jan-2014','05-Jan-2014', freq='D') series = Series(1, index=dates) result = series.resample('D') self.assertEqual(result.index[0], dates[0]) # GH 5955 # incorrect deciding to upsample when the axis frequency matches the resample frequency import datetime s = Series(np.arange(1.,6),index=[datetime.datetime(1975, 1, i, 12, 0) for i in range(1, 6)]) expected = Series(np.arange(1.,6),index=date_range('19750101',periods=5,freq='D')) result = s.resample('D',how='count') assert_series_equal(result,Series(1,index=expected.index)) result1 = s.resample('D',how='sum') result2 = s.resample('D',how='mean') result3 = s.resample('D') assert_series_equal(result1,expected) assert_series_equal(result2,expected) assert_series_equal(result3,expected)
def test_resample_bms_2752(self): # GH2753 foo = Series(index=pd.bdate_range('20000101', '20000201')) res1 = foo.resample("BMS").mean() res2 = foo.resample("BMS").mean().resample("B").mean() assert res1.index[0] == Timestamp('20000103') assert res1.index[0] == res2.index[0]
def test_resample_loffset(self): rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min') s = Series(np.random.randn(14), index=rng) result = s.resample('5min', how='mean', closed='right', label='right', loffset=timedelta(minutes=1)) idx = date_range('1/1/2000', periods=4, freq='5min') expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()], index=idx + timedelta(minutes=1)) assert_series_equal(result, expected) expected = s.resample( '5min', how='mean', closed='right', label='right', loffset='1min') assert_series_equal(result, expected) expected = s.resample( '5min', how='mean', closed='right', label='right', loffset=Minute(1)) assert_series_equal(result, expected) self.assert_(result.index.freq == Minute(5)) # from daily dti = DatetimeIndex( start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq='D') ser = Series(np.random.rand(len(dti)), dti) # to weekly result = ser.resample('w-sun', how='last') expected = ser.resample('w-sun', how='last', loffset=-bday) self.assertEqual(result.index[0] - bday, expected.index[0])
def slide12(): rng = pd.date_range('1/1/2000', periods=100, freq='D') ts = Series(np.random.randn(len(rng)), index=rng) print 'timeseries' print ts print 'resample' print ts.resample('M', how='mean')
def test_fill_method_and_how_upsample(self): # GH2073 s = Series(np.arange(9, dtype='int64'), index=date_range('2010-01-01', periods=9, freq='Q')) last = s.resample('M').ffill() both = s.resample('M').ffill().resample('M').last().astype('int64') assert_series_equal(last, both)
def test_resample_loffset(self): rng = date_range("1/1/2000 00:00:00", "1/1/2000 00:13:00", freq="min") s = Series(np.random.randn(14), index=rng) result = s.resample("5min", how="mean", closed="right", label="right", loffset=timedelta(minutes=1)) idx = date_range("1/1/2000", periods=4, freq="5min") expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()], index=idx + timedelta(minutes=1)) assert_series_equal(result, expected) expected = s.resample("5min", how="mean", closed="right", label="right", loffset="1min") assert_series_equal(result, expected) expected = s.resample("5min", how="mean", closed="right", label="right", loffset=Minute(1)) assert_series_equal(result, expected) self.assertEqual(result.index.freq, Minute(5)) # from daily dti = DatetimeIndex(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D") ser = Series(np.random.rand(len(dti)), dti) # to weekly result = ser.resample("w-sun", how="last") expected = ser.resample("w-sun", how="last", loffset=-bday) self.assertEqual(result.index[0] - bday, expected.index[0])
def test_resample_tz_localized(self): dr = date_range(start="2012-4-13", end="2012-5-1") ts = Series(lrange(len(dr)), dr) ts_utc = ts.tz_localize("UTC") ts_local = ts_utc.tz_convert("America/Los_Angeles") result = ts_local.resample("W") ts_local_naive = ts_local.copy() ts_local_naive.index = [x.replace(tzinfo=None) for x in ts_local_naive.index.to_pydatetime()] exp = ts_local_naive.resample("W").tz_localize("America/Los_Angeles") assert_series_equal(result, exp) # it works result = ts_local.resample("D") # #2245 idx = date_range("2001-09-20 15:59", "2001-09-20 16:00", freq="T", tz="Australia/Sydney") s = Series([1, 2], index=idx) result = s.resample("D", closed="right", label="right") ex_index = date_range("2001-09-21", periods=1, freq="D", tz="Australia/Sydney") expected = Series([1.5], index=ex_index) assert_series_equal(result, expected) # for good measure result = s.resample("D", kind="period") ex_index = period_range("2001-09-20", periods=1, freq="D") expected = Series([1.5], index=ex_index) assert_series_equal(result, expected)
def test_resample_upsampling_picked_but_not_correct(self): # Test for issue #3020 dates = date_range("01-Jan-2014", "05-Jan-2014", freq="D") series = Series(1, index=dates) result = series.resample("D") self.assertEquals(result.index[0], dates[0]) # GH 5955 # incorrect deciding to upsample when the axis frequency matches the resample frequency import datetime s = Series(np.arange(1.0, 6), index=[datetime.datetime(1975, 1, i, 12, 0) for i in range(1, 6)]) expected = Series(np.arange(1.0, 6), index=date_range("19750101", periods=5, freq="D")) result = s.resample("D", how="count") assert_series_equal(result, Series(1, index=expected.index)) result1 = s.resample("D", how="sum") result2 = s.resample("D", how="mean") result3 = s.resample("D") assert_series_equal(result1, expected) assert_series_equal(result2, expected) assert_series_equal(result3, expected)
def test_weekly_resample_buglet(self): # #1327 rng = date_range('1/1/2000', freq='B', periods=20) ts = Series(np.random.randn(len(rng)), index=rng) resampled = ts.resample('W') expected = ts.resample('W-SUN') assert_series_equal(resampled, expected)
def resamplingTs(): rng = Series(np.arange(13),index=pd.period_range('2010Q1','2011Q1',freq='M')) print (rng) tsQ1 = rng.resample('Q-DEC', how='mean') print (tsQ1) tsQ2 = rng.resample('Q-DEC', how=resamplingFunction) print (tsQ2) tsQ3 = rng.resample('Q-DEC', how=returnFunction) print (tsQ3)
class ResampleDatetetime64: # GH 7754 def setup(self): rng3 = date_range(start='2000-01-01 00:00:00', end='2000-01-01 10:00:00', freq='555000U') self.dt_ts = Series(5, rng3, dtype='datetime64[ns]') def time_resample(self): self.dt_ts.resample('1S').last()
class TestTimeGrouper(unittest.TestCase): def setUp(self): self.ts = Series(np.random.randn(1000), index=date_range('1/1/2000', periods=1000)) def test_apply(self): grouper = TimeGrouper('A', label='right', closed='right') grouped = self.ts.groupby(grouper) f = lambda x: x.order()[-3:] applied = grouped.apply(f) expected = self.ts.groupby(lambda x: x.year).apply(f) applied.index = applied.index.droplevel(0) expected.index = expected.index.droplevel(0) assert_series_equal(applied, expected) def test_count(self): self.ts[::3] = np.nan grouper = TimeGrouper('A', label='right', closed='right') result = self.ts.resample('A', how='count') expected = self.ts.groupby(lambda x: x.year).count() expected.index = result.index assert_series_equal(result, expected) def test_numpy_reduction(self): result = self.ts.resample('A', how='prod', closed='right') expected = self.ts.groupby(lambda x: x.year).agg(np.prod) expected.index = result.index assert_series_equal(result, expected) def test_apply_iteration(self): # #2300 N = 1000 ind = pd.date_range(start="2000-01-01", freq="D", periods=N) df = DataFrame({'open':1, 'close':2}, index=ind) tg = TimeGrouper('M') grouper = tg.get_grouper(df) # Errors grouped = df.groupby(grouper, group_keys=False) f = lambda df: df['close'] / df['open'] # it works! result = grouped.apply(f) self.assertTrue(result.index.equals(df.index))
def test_resample_tz_localized(self): dr = date_range(start='2012-4-13', end='2012-5-1') ts = Series(lrange(len(dr)), dr) ts_utc = ts.tz_localize('UTC') ts_local = ts_utc.tz_convert('America/Los_Angeles') result = ts_local.resample('W').mean() ts_local_naive = ts_local.copy() ts_local_naive.index = [x.replace(tzinfo=None) for x in ts_local_naive.index.to_pydatetime()] exp = ts_local_naive.resample( 'W').mean().tz_localize('America/Los_Angeles') assert_series_equal(result, exp) # it works result = ts_local.resample('D').mean() # #2245 idx = date_range('2001-09-20 15:59', '2001-09-20 16:00', freq='T', tz='Australia/Sydney') s = Series([1, 2], index=idx) result = s.resample('D', closed='right', label='right').mean() ex_index = date_range('2001-09-21', periods=1, freq='D', tz='Australia/Sydney') expected = Series([1.5], index=ex_index) assert_series_equal(result, expected) # for good measure result = s.resample('D', kind='period').mean() ex_index = period_range('2001-09-20', periods=1, freq='D') expected = Series([1.5], index=ex_index) assert_series_equal(result, expected) # GH 6397 # comparing an offset that doesn't propagate tz's rng = date_range('1/1/2011', periods=20000, freq='H') rng = rng.tz_localize('EST') ts = DataFrame(index=rng) ts['first'] = np.random.randn(len(rng)) ts['second'] = np.cumsum(np.random.randn(len(rng))) expected = DataFrame( { 'first': ts.resample('A').sum()['first'], 'second': ts.resample('A').mean()['second']}, columns=['first', 'second']) result = ts.resample( 'A').agg({'first': np.sum, 'second': np.mean}).reindex(columns=['first', 'second']) assert_frame_equal(result, expected)
def test_resample_weekly_all_na(self): rng = date_range('1/1/2000', periods=10, freq='W-WED') ts = Series(np.random.randn(len(rng)), index=rng) result = ts.resample('W-THU') self.assert_(result.isnull().all()) result = ts.resample('W-THU', fill_method='ffill')[:-1] expected = ts.asfreq('W-THU', method='ffill') assert_series_equal(result, expected)
def test_resample_weekly_all_na(self): rng = date_range("1/1/2000", periods=10, freq="W-WED") ts = Series(np.random.randn(len(rng)), index=rng) result = ts.resample("W-THU") self.assert_(result.isnull().all()) result = ts.resample("W-THU", fill_method="ffill")[:-1] expected = ts.asfreq("W-THU", method="ffill") assert_series_equal(result, expected)
def test_secondary_y_mixed_freq_ts_xlim(self): # GH 3490 - mixed frequency timeseries with secondary y rng = date_range('2000-01-01', periods=10000, freq='min') ts = Series(1, index=rng) ax = ts.plot() left_before, right_before = ax.get_xlim() ts.resample('D').plot(secondary_y=True, ax=ax) left_after, right_after = ax.get_xlim() # a downsample should not have changed either limit self.assertEqual(left_before, left_after) self.assertEqual(right_before, right_after)
def test_metadata_propagation_indiv(self): # check that the metadata matches up on the resulting ops o = Series(range(3), range(3)) o.name = 'foo' o2 = Series(range(3), range(3)) o2.name = 'bar' result = o.T self.check_metadata(o, result) # resample ts = Series(np.random.rand(1000), index=date_range('20130101', periods=1000, freq='s'), name='foo') result = ts.resample('1T').mean() self.check_metadata(ts, result) result = ts.resample('1T').min() self.check_metadata(ts, result) result = ts.resample('1T').apply(lambda x: x.sum()) self.check_metadata(ts, result) _metadata = Series._metadata _finalize = Series.__finalize__ Series._metadata = ['name', 'filename'] o.filename = 'foo' o2.filename = 'bar' def finalize(self, other, method=None, **kwargs): for name in self._metadata: if method == 'concat' and name == 'filename': value = '+'.join([getattr( o, name) for o in other.objs if getattr(o, name, None) ]) object.__setattr__(self, name, value) else: object.__setattr__(self, name, getattr(other, name, None)) return self Series.__finalize__ = finalize result = pd.concat([o, o2]) assert result.filename == 'foo+bar' assert result.name is None # reset Series._metadata = _metadata Series.__finalize__ = _finalize
def date_11(): from pandas.tseries.offsets import Day,MonthEnd now=datetime(2011,11,17) print now+3*Day() print now+MonthEnd() print now+MonthEnd(2) offset=MonthEnd() print offset.rollforward(now) print offset.rollback(now) ts=Series(np.random.randn(20),index=pd.date_range('1/15/2000',periods=20,freq='4d')) print ts.groupby(offset.rollforward).mean() print ts.resample('M',how='mean')
def test_secondary_y_mixed_freq_ts_xlim(self): # GH 3490 - mixed frequency timeseries with secondary y rng = date_range('2000-01-01', periods=10000, freq='min') ts = Series(1, index=rng) _, ax = self.plt.subplots() ts.plot(ax=ax) left_before, right_before = ax.get_xlim() ts.resample('D').mean().plot(secondary_y=True, ax=ax) left_after, right_after = ax.get_xlim() # a downsample should not have changed either limit assert left_before == left_after assert right_before == right_after
def test_resample_base_with_timedeltaindex(): # GH 10530 rng = timedelta_range(start='0s', periods=25, freq='s') ts = Series(np.random.randn(len(rng)), index=rng) with_base = ts.resample('2s', base=5).mean() without_base = ts.resample('2s').mean() exp_without_base = timedelta_range(start='0s', end='25s', freq='2s') exp_with_base = timedelta_range(start='5s', end='29s', freq='2s') tm.assert_index_equal(without_base.index, exp_without_base) tm.assert_index_equal(with_base.index, exp_with_base)
def test_closed_left_corner(self): # #1465 s = Series(np.random.randn(21), index=date_range(start='1/1/2012 9:30', freq='1min', periods=21)) s[0] = np.nan result = s.resample('10min', how='mean',closed='left', label='right') exp = s[1:].resample('10min', how='mean',closed='left', label='right') assert_series_equal(result, exp) result = s.resample('10min', how='mean',closed='left', label='left') exp = s[1:].resample('10min', how='mean',closed='left', label='left') assert_series_equal(result, exp)
def test_resample_tz_localized(self): dr = date_range(start="2012-4-13", end="2012-5-1") ts = Series(lrange(len(dr)), dr) ts_utc = ts.tz_localize("UTC") ts_local = ts_utc.tz_convert("America/Los_Angeles") result = ts_local.resample("W") ts_local_naive = ts_local.copy() ts_local_naive.index = [x.replace(tzinfo=None) for x in ts_local_naive.index.to_pydatetime()] exp = ts_local_naive.resample("W").tz_localize("America/Los_Angeles") assert_series_equal(result, exp) # it works result = ts_local.resample("D") # #2245 idx = date_range("2001-09-20 15:59", "2001-09-20 16:00", freq="T", tz="Australia/Sydney") s = Series([1, 2], index=idx) result = s.resample("D", closed="right", label="right") ex_index = date_range("2001-09-21", periods=1, freq="D", tz="Australia/Sydney") expected = Series([1.5], index=ex_index) assert_series_equal(result, expected) # for good measure result = s.resample("D", kind="period") ex_index = period_range("2001-09-20", periods=1, freq="D") expected = Series([1.5], index=ex_index) assert_series_equal(result, expected) # GH 6397 # comparing an offset that doesn't propogate tz's rng = date_range("1/1/2011", periods=20000, freq="H") rng = rng.tz_localize("EST") ts = DataFrame(index=rng) ts["first"] = np.random.randn(len(rng)) ts["second"] = np.cumsum(np.random.randn(len(rng))) expected = DataFrame( {"first": ts.resample("A", how=np.sum)["first"], "second": ts.resample("A", how=np.mean)["second"]}, columns=["first", "second"], ) result = ts.resample("A", how={"first": np.sum, "second": np.mean}).reindex(columns=["first", "second"]) assert_frame_equal(result, expected)
def test_quarterly_resampling(self): rng = period_range('2000Q1', periods=10, freq='Q-DEC') ts = Series(np.arange(10), index=rng) result = ts.resample('A') exp = ts.to_timestamp().resample('A').to_period() assert_series_equal(result, exp)
def test_resample_ambiguous_time_bin_edge(self): # GH 10117 idx = pd.date_range("2014-10-25 22:00:00", "2014-10-26 00:30:00", freq="30T", tz="Europe/London") expected = Series(np.zeros(len(idx)), index=idx) result = expected.resample('30T').mean() tm.assert_series_equal(result, expected)
def test_upsample_with_limit(self): rng = date_range('1/1/2000', periods=3, freq='5t') ts = Series(np.random.randn(len(rng)), rng) result = ts.resample('t', fill_method='ffill', limit=2) expected = ts.reindex(result.index, method='ffill', limit=2) assert_series_equal(result, expected)
def test_closed_left_corner(self): # #1465 s = Series(np.random.randn(21), index=date_range(start="1/1/2012 9:30", freq="1min", periods=21)) s[0] = np.nan result = s.resample("10min", how="mean", closed="left", label="right") exp = s[1:].resample("10min", how="mean", closed="left", label="right") assert_series_equal(result, exp) result = s.resample("10min", how="mean", closed="left", label="left") exp = s[1:].resample("10min", how="mean", closed="left", label="left") ex_index = date_range(start="1/1/2012 9:30", freq="10min", periods=3) self.assert_(result.index.equals(ex_index)) assert_series_equal(result, exp)
def test_upsample_with_limit(self): rng = period_range("1/1/2000", periods=5, freq="A") ts = Series(np.random.randn(len(rng)), rng) result = ts.resample("M", fill_method="ffill", limit=2, convention="end") expected = ts.asfreq("M").reindex(result.index, method="ffill", limit=2) assert_series_equal(result, expected)
def test_all_values_single_bin(self): # 2070 index = period_range(start="2012-01-01", end="2012-12-31", freq="M") s = Series(np.random.randn(len(index)), index=index) result = s.resample("A", how='mean') tm.assert_almost_equal(result[0], s.mean())
def test_annual_upsample(self): targets = ['D', 'B', 'M'] for month in MONTHS: ts = _simple_pts('1/1/1990', '12/31/1995', freq='A-%s' % month) for targ, conv, meth in product(targets, ['start', 'end'], ['ffill', 'bfill']): result = ts.resample(targ, fill_method=meth, convention=conv) expected = result.to_timestamp(targ, how=conv) expected = expected.asfreq(targ, meth).to_period() assert_series_equal(result, expected) df = DataFrame({'a' : ts}) rdf = df.resample('D', fill_method='ffill') exp = df['a'].resample('D', fill_method='ffill') assert_series_equal(rdf['a'], exp) rng = period_range('2000', '2003', freq='A-DEC') ts = Series([1, 2, 3, 4], index=rng) result = ts.resample('M', fill_method='ffill') ex_index = period_range('2000-01', '2003-12', freq='M') expected = ts.asfreq('M', how='start').reindex(ex_index, method='ffill') assert_series_equal(result, expected)
def test_resample_size(): n = 10000 dr = date_range('2015-09-19', periods=n, freq='T') ts = Series(np.random.randn(n), index=np.random.choice(dr, n)) left = ts.resample('7T').size() ix = date_range(start=left.index.min(), end=ts.index.max(), freq='7T') bins = np.searchsorted(ix.values, ts.index.values, side='right') val = np.bincount(bins, minlength=len(ix) + 1)[1:].astype('int64', copy=False) right = Series(val, index=ix) assert_series_equal(left, right)
def test_resample_upsample(): # from daily dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq='D', name='index') s = Series(np.random.rand(len(dti)), dti) # to minutely, by padding result = s.resample('Min').pad() assert len(result) == 12961 assert result[0] == s[0] assert result[-1] == s[-1] assert result.index.name == 'index'
def test_resample_how_method(): # GH9915 s = Series([11, 22], index=[Timestamp('2015-03-31 21:48:52.672000'), Timestamp('2015-03-31 21:49:52.739000')]) expected = Series([11, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, 22], index=[Timestamp('2015-03-31 21:48:50'), Timestamp('2015-03-31 21:49:00'), Timestamp('2015-03-31 21:49:10'), Timestamp('2015-03-31 21:49:20'), Timestamp('2015-03-31 21:49:30'), Timestamp('2015-03-31 21:49:40'), Timestamp('2015-03-31 21:49:50')]) assert_series_equal(s.resample("10S").mean(), expected)
def test_corner_cases(simple_period_range_series, simple_date_range_series): # miscellaneous test coverage rng = date_range('1/1/2000', periods=12, freq='t') ts = Series(np.random.randn(len(rng)), index=rng) result = ts.resample('5t', closed='right', label='left').mean() ex_index = date_range('1999-12-31 23:55', periods=4, freq='5t') tm.assert_index_equal(result.index, ex_index) len0pts = simple_period_range_series( '2007-01', '2010-05', freq='M')[:0] # it works result = len0pts.resample('A-DEC').mean() assert len(result) == 0 # resample to periods ts = simple_date_range_series( '2000-04-28', '2000-04-30 11:00', freq='h') result = ts.resample('M', kind='period').mean() assert len(result) == 1 assert result.index[0] == Period('2000-04', freq='M')
def test_resample_nunique_with_date_gap(): # GH 13453 index = pd.date_range("1-1-2000", "2-15-2000", freq="h") index2 = pd.date_range("4-15-2000", "5-15-2000", freq="h") index3 = index.append(index2) s = Series(range(len(index3)), index=index3, dtype="int64") r = s.resample("M") # Since all elements are unique, these should all be the same results = [r.count(), r.nunique(), r.agg(Series.nunique), r.agg("nunique")] tm.assert_series_equal(results[0], results[1]) tm.assert_series_equal(results[0], results[2]) tm.assert_series_equal(results[0], results[3])
def test_resample_loffset(loffset): # GH 7687 rng = date_range("1/1/2000 00:00:00", "1/1/2000 00:13:00", freq="min") s = Series(np.random.randn(14), index=rng) result = s.resample("5min", closed="right", label="right", loffset=loffset).mean() idx = date_range("1/1/2000", periods=4, freq="5min") expected = Series( [s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()], index=idx + timedelta(minutes=1), ) tm.assert_series_equal(result, expected) assert result.index.freq == Minute(5) # from daily dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D") ser = Series(np.random.rand(len(dti)), dti) # to weekly result = ser.resample("w-sun").last() business_day_offset = BDay() expected = ser.resample("w-sun", loffset=-business_day_offset).last() assert result.index[0] - business_day_offset == expected.index[0]
def test_nanosecond_resample_error(): # GH 12307 - Values falls after last bin when # Resampling using pd.tseries.offsets.Nano as period start = 1443707890427 exp_start = 1443707890400 indx = pd.date_range(start=pd.to_datetime(start), periods=10, freq="100n") ts = Series(range(len(indx)), index=indx) r = ts.resample(pd.tseries.offsets.Nano(100)) result = r.agg("mean") exp_indx = pd.date_range(start=pd.to_datetime(exp_start), periods=10, freq="100n") exp = Series(range(len(exp_indx)), index=exp_indx) tm.assert_series_equal(result, exp)
def test_api_compat_before_use(attr): # make sure that we are setting the binner # on these attributes rng = date_range("1/1/2012", periods=100, freq="S") ts = Series(np.arange(len(rng)), index=rng) rs = ts.resample("30s") # before use getattr(rs, attr) # after grouper is initialized is ok rs.mean() getattr(rs, attr)
def test_upsampling_ohlc(self, freq, period_mult, kind): # GH 13083 pi = PeriodIndex(start='2000', freq='D', periods=10) s = Series(range(len(pi)), index=pi) expected = s.to_timestamp().resample(freq).ohlc().to_period(freq) # timestamp-based resampling doesn't include all sub-periods # of the last original period, so extend accordingly: new_index = PeriodIndex(start='2000', freq=freq, periods=period_mult * len(pi)) expected = expected.reindex(new_index) result = s.resample(freq, kind=kind).ohlc() assert_frame_equal(result, expected)
def test_resample_with_pytz(self): # GH 13238 s = Series(2, index=pd.date_range('2017-01-01', periods=48, freq="H", tz="US/Eastern")) result = s.resample("D").mean() expected = Series(2, index=pd.DatetimeIndex(['2017-01-01', '2017-01-02'], tz="US/Eastern")) assert_series_equal(result, expected) # Especially assert that the timezone is LMT for pytz assert result.index.tz == pytz.timezone('US/Eastern')
def test_metadata_propagation_indiv(self): # check that the metadata matches up on the resulting ops o = Series(range(3), range(3)) o.name = 'foo' o2 = Series(range(3), range(3)) o2.name = 'bar' result = o.T self.check_metadata(o, result) # resample ts = Series(np.random.rand(1000), index=date_range('20130101', periods=1000, freq='s'), name='foo') result = ts.resample('1T') self.check_metadata(ts, result) result = ts.resample('1T', how='min') self.check_metadata(ts, result) result = ts.resample('1T', how=lambda x: x.sum()) self.check_metadata(ts, result)
def test_resample_tz_localized(self): dr = date_range(start='2012-4-13', end='2012-5-1') ts = Series(lrange(len(dr)), dr) ts_utc = ts.tz_localize('UTC') ts_local = ts_utc.tz_convert('America/Los_Angeles') result = ts_local.resample('W') ts_local_naive = ts_local.copy() ts_local_naive.index = [x.replace(tzinfo=None) for x in ts_local_naive.index.to_pydatetime()] exp = ts_local_naive.resample('W').tz_localize('America/Los_Angeles') assert_series_equal(result, exp) # it works result = ts_local.resample('D') # #2245 idx = date_range('2001-09-20 15:59', '2001-09-20 16:00', freq='T', tz='Australia/Sydney') s = Series([1, 2], index=idx) result = s.resample('D', closed='right', label='right') ex_index = date_range('2001-09-21', periods=1, freq='D', tz='Australia/Sydney') expected = Series([1.5], index=ex_index) assert_series_equal(result, expected) # for good measure result = s.resample('D', kind='period') ex_index = period_range('2001-09-20', periods=1, freq='D') expected = Series([1.5], index=ex_index) assert_series_equal(result, expected)
class TestTimeGrouper(unittest.TestCase): def setUp(self): self.ts = Series(np.random.randn(1000), index=date_range('1/1/2000', periods=1000)) def test_apply(self): grouper = TimeGrouper('A', label='right', closed='right') grouped = self.ts.groupby(grouper) f = lambda x: x.order()[-3:] applied = grouped.apply(f) expected = self.ts.groupby(lambda x: x.year).apply(f) applied.index = applied.index.droplevel(0) expected.index = expected.index.droplevel(0) assert_series_equal(applied, expected) def test_count(self): self.ts[::3] = np.nan grouper = TimeGrouper('A', label='right', closed='right') result = self.ts.resample('A', how='count') expected = self.ts.groupby(lambda x: x.year).count() expected.index = result.index assert_series_equal(result, expected) def test_numpy_reduction(self): result = self.ts.resample('A', how='prod', closed='right') expected = self.ts.groupby(lambda x: x.year).agg(np.prod) expected.index = result.index assert_series_equal(result, expected)
def test_resample_upsample(self): # from daily dti = DatetimeIndex( start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq='D', name='index') s = Series(np.random.rand(len(dti)), dti) # to minutely, by padding result = s.resample('Min', fill_method='pad') self.assertEquals(len(result), 12961) self.assertEquals(result[0], s[0]) self.assertEquals(result[-1], s[-1]) self.assertEqual(result.index.name, 'index')
def test_api_compat_before_use(): # make sure that we are setting the binner # on these attributes for attr in ['groups', 'ngroups', 'indices']: rng = pd.date_range('1/1/2012', periods=100, freq='S') ts = Series(np.arange(len(rng)), index=rng) rs = ts.resample('30s') # before use getattr(rs, attr) # after grouper is initialized is ok rs.mean() getattr(rs, attr)
def test_annual_upsample(self, simple_period_range_series): ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="A-DEC") df = DataFrame({"a": ts}) rdf = df.resample("D").ffill() exp = df["a"].resample("D").ffill() tm.assert_series_equal(rdf["a"], exp) rng = period_range("2000", "2003", freq="A-DEC") ts = Series([1, 2, 3, 4], index=rng) result = ts.resample("M").ffill() ex_index = period_range("2000-01", "2003-12", freq="M") expected = ts.asfreq("M", how="start").reindex(ex_index, method="ffill") tm.assert_series_equal(result, expected)
def test_resample_anchored_ticks(self): # If a fixed delta (5 minute, 4 hour) evenly divides a day, we should # "anchor" the origin at midnight so we get regular intervals rather # than starting from the first timestamp which might start in the middle # of a desired interval rng = date_range('1/1/2000 04:00:00', periods=86400, freq='s') ts = Series(np.random.randn(len(rng)), index=rng) ts[:2] = np.nan # so results are the same freqs = ['t', '5t', '15t', '30t', '4h', '12h'] for freq in freqs: result = ts[2:].resample(freq, closed='left', label='left') expected = ts.resample(freq, closed='left', label='left') assert_series_equal(result, expected)
def test_resample_loffset_upsample(): # GH 20744 rng = date_range("1/1/2000 00:00:00", "1/1/2000 00:13:00", freq="min") s = Series(np.random.randn(14), index=rng) with tm.assert_produces_warning(FutureWarning): result = s.resample("5min", closed="right", label="right", loffset=timedelta(minutes=1)).ffill() idx = date_range("1/1/2000", periods=4, freq="5min") expected = Series([s[0], s[5], s[10], s[-1]], index=idx + timedelta(minutes=1)) tm.assert_series_equal(result, expected)
def slide6(): dates = [ datetime(2011, 1, 2), datetime(2011, 1, 5), datetime(2011, 1, 7), datetime(2011, 1, 8), datetime(2011, 1, 10), datetime(2011, 1, 12) ] ts = Series(np.random.randn(6), index=dates) print ts print ts.resample('D') index = pd.date_range('4/1/2012', '6/1/2012') print index print 'start' print pd.date_range(start='4/1/2012', periods=20) print 'end' print pd.date_range(end='6/1/2012', periods=20) print 'business end of month' print pd.date_range('1/1/2000', '12/1/2000', freq='BM') print pd.date_range('5/2/2012 12:56:31', periods=5) print 'normalize' print pd.date_range('5/2/2012 12:56:31', periods=5, normalize=True)
def make_qtrly(s: pd.Series, t: str = 'first', name: str = None) -> pd.Series: s.index = pd.DatetimeIndex(s.index.values, dtype=dt.date) s.index.freq = s.index.inferred_freq name = name or s.name or '' # print(s) if t == 'mean': s = s.resample('1Q').mean().astype(np.float64) elif t == 'first': s = s.resample('1Q').first().astype(np.float64) elif t == 'last': s = s.resample('1Q').last().astype(np.float64) if s.isnull().any(): print( f'Series {name} still has some empty data. Filling that in with the last known value.' ) s.fillna(method='ffill', inplace=True) # Conform everything to the end of the quarter idx = s.index for i, v in enumerate(idx): v.replace(month=math.ceil(v.month / 3) * 3) v.replace(day=calendar.monthrange(v.year, v.month)[-1]) s.index = idx # s.index = s.index + pd.Timedelta(3, unit='M') - pd.Timedelta(1, unit='d') # s.index = pd.to_datetime([d + relativedelta(days=1) for d in s.index]) # s.index.freq = s.index.inferred_freq # I wanted to make this function more dynamic and eliminate the if/else bullshit, with the below line (which failed) # s = s.resample('3MS').apply(eval(t + '(self)', {"__builtins__": None}, safe_funcs)).astype(np.float64) # print(s) return s
def test_resample_anchored_ticks(): # If a fixed delta (5 minute, 4 hour) evenly divides a day, we should # "anchor" the origin at midnight so we get regular intervals rather # than starting from the first timestamp which might start in the # middle of a desired interval rng = date_range("1/1/2000 04:00:00", periods=86400, freq="s") ts = Series(np.random.randn(len(rng)), index=rng) ts[:2] = np.nan # so results are the same freqs = ["t", "5t", "15t", "30t", "4h", "12h"] for freq in freqs: result = ts[2:].resample(freq, closed="left", label="left").mean() expected = ts.resample(freq, closed="left", label="left").mean() tm.assert_series_equal(result, expected)
def test_rolling_max_resample(): indices = [datetime(1975, 1, i) for i in range(1, 6)] # So that we can have 3 datapoints on last day (4, 10, and 20) indices.append(datetime(1975, 1, 5, 1)) indices.append(datetime(1975, 1, 5, 2)) series = Series(list(range(0, 5)) + [10, 20], index=indices) # Use floats instead of ints as values series = series.map(lambda x: float(x)) # Sort chronologically series = series.sort_index() # Default how should be max expected = Series( [0.0, 1.0, 2.0, 3.0, 20.0], index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"), ) x = series.resample("D").max().rolling(window=1).max() tm.assert_series_equal(expected, x) # Now specify median (10.0) expected = Series( [0.0, 1.0, 2.0, 3.0, 10.0], index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"), ) x = series.resample("D").median().rolling(window=1).max() tm.assert_series_equal(expected, x) # Now specify mean (4+10+20)/3 v = (4.0 + 10.0 + 20.0) / 3.0 expected = Series( [0.0, 1.0, 2.0, 3.0, v], index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"), ) x = series.resample("D").mean().rolling(window=1).max() tm.assert_series_equal(expected, x)
def test_resample_basic(self): rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min', name='index') s = Series(np.random.randn(14), index=rng) result = s.resample('5min', how='mean', closed='right', label='right') expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()], index=date_range('1/1/2000', periods=4, freq='5min')) assert_series_equal(result, expected) self.assertEqual(result.index.name, 'index') result = s.resample('5min', how='mean', closed='left', label='right') expected = Series([s[:5].mean(), s[5:10].mean(), s[10:].mean()], index=date_range('1/1/2000 00:05', periods=3, freq='5min')) assert_series_equal(result, expected) s = self.series result = s.resample('5Min', how='last') grouper = TimeGrouper(Minute(5), closed='left', label='left') expect = s.groupby(grouper).agg(lambda x: x[-1]) assert_series_equal(result, expect)
def test_resample_float_base(): # GH25161 dt = pd.to_datetime([ "2018-11-26 16:17:43.51", "2018-11-26 16:17:44.51", "2018-11-26 16:17:45.51" ]) s = Series(np.arange(3), index=dt) base = 17 + 43.51 / 60 with tm.assert_produces_warning(FutureWarning): result = s.resample("3min", base=base).size() expected = Series(3, index=pd.DatetimeIndex(["2018-11-26 16:17:43.51"], freq="3min")) tm.assert_series_equal(result, expected)
def resample(series: pd.Series, freq: str, method: str = 'mean') -> pd.Series: """Resamples its input series using `freq` and the aggregation method `method` (as described in the pandas documentation). Example: `(resample (series "hourly") "D")` """ resampled = series.resample(freq) # check method meth = getattr(resampled, method, None) if meth is None: raise ValueError(f'bad resampling method `{method}`') return resampled.apply(method)
def test_annual_upsample(self): ts = _simple_pts('1/1/1990', '12/31/1995', freq='A-DEC') df = DataFrame({'a': ts}) rdf = df.resample('D', fill_method='ffill') exp = df['a'].resample('D', fill_method='ffill') assert_series_equal(rdf['a'], exp) rng = period_range('2000', '2003', freq='A-DEC') ts = Series([1, 2, 3, 4], index=rng) result = ts.resample('M', fill_method='ffill') ex_index = period_range('2000-01', '2003-12', freq='M') expected = ts.asfreq('M', how='start').reindex(ex_index, method='ffill') assert_series_equal(result, expected)
def reaggregate_ipws(ipws, fun=npsum, freq='H', rule='D'): """ Resample IPWs using the function fun, but only sum is supported. `freq` corresponds to the actual frequency of the ipws; rule corresponds to one of the resampling 'rules' given here: http://pandas.pydata.org/pandas-docs/dev/timeseries.html#time-date-components """ assert fun is npsum, "Cannot use " + fun.func_name + \ ", only sum has been implemented" assert _is_consecutive(ipws) ipw0 = ipws[0] start_datetime = ipw0.start_datetime idx = date_range(start=start_datetime, periods=len(ipws), freq=freq) series = Series(map(lambda ipw: ipw.data_frame(), ipws), index=idx) resampled = series.resample(rule, how=npsum) resampled_idx = resampled.index resampled_dt = resampled_idx[1] - resampled_idx[0] resampled_ipws = [IPW() for el in resampled] header_dict = deepcopy(ipw0.header_dict) file_type = ipw0.file_type # bands = deepcopy(ipw0.bands) bands = ipw0.bands # nonglobal_bands = deepcopy(ipw0.nonglobal_bands) nonglobal_bands = ipw0.nonglobal_bands geotransform = ipw0.geotransform for ipw_idx, ipw in enumerate(resampled_ipws): ipw._data_frame = resampled[ipw_idx] ipw.start_datetime = resampled_idx[ipw_idx] ipw.end_datetime = resampled_idx[ipw_idx] + resampled_dt ipw.header_dict = deepcopy(header_dict) ipw.file_type = file_type ipw.bands = deepcopy(bands) ipw.nonglobal_bands = deepcopy(nonglobal_bands) ipw.geotransform = geotransform ipw.recalculate_header() return resampled_ipws
def test_resample_basic(self): # GH3609 s = Series( range(100), index=date_range("20130101", freq="s", periods=100, name="idx"), dtype="float", ) s[10:30] = np.nan index = PeriodIndex( [Period("2013-01-01 00:00", "T"), Period("2013-01-01 00:01", "T")], name="idx", ) expected = Series([34.5, 79.5], index=index) result = s.to_period().resample("T", kind="period").mean() tm.assert_series_equal(result, expected) result2 = s.resample("T", kind="period").mean() tm.assert_series_equal(result2, expected)
def test_rolling_max_gh6297(): """Replicate result expected in GH #6297""" indices = [datetime(1975, 1, i) for i in range(1, 6)] # So that we can have 2 datapoints on one of the days indices.append(datetime(1975, 1, 3, 6, 0)) series = Series(range(1, 7), index=indices) # Use floats instead of ints as values series = series.map(lambda x: float(x)) # Sort chronologically series = series.sort_index() expected = Series( [1.0, 2.0, 6.0, 4.0, 5.0], index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"), ) x = series.resample("D").max().rolling(window=1).max() tm.assert_series_equal(expected, x)
def wind(wind: pd.Series, ts: float): """ Wind is assumed constant throughout the day Parameters ---------- wind: Daily timeseries of wind ts: Timestep to disaggregate down to Returns ------- wind: A sub-daily timeseries of wind """ return wind.resample('{:0.0f}T'.format(ts)).fillna(method='ffill')