def test_upsample_daily_business_daily(self, simple_period_range_series): ts = simple_period_range_series('1/1/2000', '2/1/2000', freq='B') result = ts.resample('D').asfreq() expected = ts.asfreq('D').reindex(period_range('1/3/2000', '2/1/2000')) assert_series_equal(result, expected) ts = simple_period_range_series('1/1/2000', '2/1/2000') result = ts.resample('H', convention='s').asfreq() exp_rng = period_range('1/1/2000', '2/1/2000 23:00', freq='H') expected = ts.asfreq('H', how='s').reindex(exp_rng) assert_series_equal(result, expected)
def test_upsampling_ohlc(self, freq, period_mult, kind): # GH 13083 pi = period_range(start='2000', freq='D', periods=10) s = Series(range(len(pi)), index=pi) expected = s.to_timestamp().resample(freq).ohlc().to_period(freq) # timestamp-based resampling doesn't include all sub-periods # of the last original period, so extend accordingly: new_index = period_range(start='2000', freq=freq, periods=period_mult * len(pi)) expected = expected.reindex(new_index) result = s.resample(freq, kind=kind).ohlc() assert_frame_equal(result, expected)
def create_series(self): # TODO: replace calls to .create_series() by injecting the series # fixture i = period_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq='D') return Series(np.arange(len(i)), index=i, name='pi')
def test_all_values_single_bin(self): # 2070 index = period_range(start="2012-01-01", end="2012-12-31", freq="M") s = Series(np.random.randn(len(index)), index=index) result = s.resample("A").mean() tm.assert_almost_equal(result[0], s.mean())
def test_loffset_returns_datetimeindex(self, frame, kind, agg_arg): # make sure passing loffset returns DatetimeIndex in all cases # basic method taken from Base.test_resample_loffset_arg_type() df = frame expected_means = [ df.values[i:i + 2].mean() for i in range(0, len(df.values), 2) ] expected_index = period_range(df.index[0], periods=len(df.index) / 2, freq='2D') # loffset coerces PeriodIndex to DateTimeIndex expected_index = expected_index.to_timestamp() expected_index += timedelta(hours=2) expected = DataFrame({'value': expected_means}, index=expected_index) result_agg = df.resample('2D', loffset='2H', kind=kind).agg(agg_arg) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result_how = df.resample('2D', how=agg_arg, loffset='2H', kind=kind) if isinstance(agg_arg, list): expected.columns = pd.MultiIndex.from_tuples([('value', 'mean')]) assert_frame_equal(result_agg, expected) assert_frame_equal(result_how, expected)
def test_quarterly_resampling(self): rng = period_range('2000Q1', periods=10, freq='Q-DEC') ts = Series(np.arange(10), index=rng) result = ts.resample('A').mean() exp = ts.to_timestamp().resample('A').mean().to_period() assert_series_equal(result, exp)
def test_all_values_single_bin(self): # 2070 index = period_range(start="2012-01-01", end="2012-12-31", freq="M") s = Series(np.random.randn(len(index)), index=index) result = s.resample("A").mean() tm.assert_almost_equal(result[0], s.mean())
def test_annual_upsample(self, simple_period_range_series): ts = simple_period_range_series('1/1/1990', '12/31/1995', freq='A-DEC') df = DataFrame({'a': ts}) rdf = df.resample('D').ffill() exp = df['a'].resample('D').ffill() assert_series_equal(rdf['a'], exp) rng = period_range('2000', '2003', freq='A-DEC') ts = Series([1, 2, 3, 4], index=rng) result = ts.resample('M').ffill() ex_index = period_range('2000-01', '2003-12', freq='M') expected = ts.asfreq('M', how='start').reindex(ex_index, method='ffill') assert_series_equal(result, expected)
def test_resample_to_quarterly(self, simple_period_range_series): for month in MONTHS: ts = simple_period_range_series('1990', '1992', freq='A-%s' % month) quar_ts = ts.resample('Q-%s' % month).ffill() stamps = ts.to_timestamp('D', how='start') qdates = period_range(ts.index[0].asfreq('D', 'start'), ts.index[-1].asfreq('D', 'end'), freq='Q-%s' % month) expected = stamps.reindex(qdates.to_timestamp('D', 's'), method='ffill') expected.index = qdates assert_series_equal(quar_ts, expected) # conforms, but different month ts = simple_period_range_series('1990', '1992', freq='A-JUN') for how in ['start', 'end']: result = ts.resample('Q-MAR', convention=how).ffill() expected = ts.asfreq('Q-MAR', how=how) expected = expected.reindex(result.index, method='ffill') # .to_timestamp('D') # expected = expected.resample('Q-MAR').ffill() assert_series_equal(result, expected)
def test_to_period(self): from pandas.core.indexes.period import period_range ts = _simple_ts('1/1/2000', '1/1/2001') pts = ts.to_period() exp = ts.copy() exp.index = period_range('1/1/2000', '1/1/2001') assert_series_equal(pts, exp) pts = ts.to_period('M') exp.index = exp.index.asfreq('M') tm.assert_index_equal(pts.index, exp.index.asfreq('M')) assert_series_equal(pts, exp) # GH 7606 without freq idx = DatetimeIndex( ['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04']) exp_idx = pd.PeriodIndex( ['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04'], freq='D') s = Series(np.random.randn(4), index=idx) expected = s.copy() expected.index = exp_idx assert_series_equal(s.to_period(), expected) df = DataFrame(np.random.randn(4, 4), index=idx, columns=idx) expected = df.copy() expected.index = exp_idx assert_frame_equal(df.to_period(), expected) expected = df.copy() expected.columns = exp_idx assert_frame_equal(df.to_period(axis=1), expected)
def test_frame_to_period(self): K = 5 from pandas.core.indexes.period import period_range dr = date_range('1/1/2000', '1/1/2001') pr = period_range('1/1/2000', '1/1/2001') df = DataFrame(randn(len(dr), K), index=dr) df['mix'] = 'a' pts = df.to_period() exp = df.copy() exp.index = pr assert_frame_equal(pts, exp) pts = df.to_period('M') tm.assert_index_equal(pts.index, exp.index.asfreq('M')) df = df.T pts = df.to_period(axis=1) exp = df.copy() exp.columns = pr assert_frame_equal(pts, exp) pts = df.to_period('M', axis=1) tm.assert_index_equal(pts.columns, exp.columns.asfreq('M')) pytest.raises(ValueError, df.to_period, axis=2)
def test_resample_to_quarterly(self, simple_period_range_series): for month in MONTHS: ts = simple_period_range_series("1990", "1992", freq=f"A-{month}") quar_ts = ts.resample(f"Q-{month}").ffill() stamps = ts.to_timestamp("D", how="start") qdates = period_range( ts.index[0].asfreq("D", "start"), ts.index[-1].asfreq("D", "end"), freq=f"Q-{month}", ) expected = stamps.reindex(qdates.to_timestamp("D", "s"), method="ffill") expected.index = qdates tm.assert_series_equal(quar_ts, expected) # conforms, but different month ts = simple_period_range_series("1990", "1992", freq="A-JUN") for how in ["start", "end"]: result = ts.resample("Q-MAR", convention=how).ffill() expected = ts.asfreq("Q-MAR", how=how) expected = expected.reindex(result.index, method="ffill") # .to_timestamp('D') # expected = expected.resample('Q-MAR').ffill() tm.assert_series_equal(result, expected)
def test_quarterly_resampling(self): rng = period_range('2000Q1', periods=10, freq='Q-DEC') ts = Series(np.arange(10), index=rng) result = ts.resample('A').mean() exp = ts.to_timestamp().resample('A').mean().to_period() assert_series_equal(result, exp)
def test_mixed_freq_irreg_period(self): ts = tm.makeTimeSeries() irreg = ts[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 16, 17, 18, 29]] rng = period_range('1/3/2000', periods=30, freq='B') ps = Series(np.random.randn(len(rng)), rng) irreg.plot() ps.plot()
def test_to_period(self): from pandas.core.indexes.period import period_range ts = _simple_ts("1/1/2000", "1/1/2001") pts = ts.to_period() exp = ts.copy() exp.index = period_range("1/1/2000", "1/1/2001") tm.assert_series_equal(pts, exp) pts = ts.to_period("M") exp.index = exp.index.asfreq("M") tm.assert_index_equal(pts.index, exp.index.asfreq("M")) tm.assert_series_equal(pts, exp) # GH 7606 without freq idx = DatetimeIndex( ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"]) exp_idx = pd.PeriodIndex( ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], freq="D") s = Series(np.random.randn(4), index=idx) expected = s.copy() expected.index = exp_idx tm.assert_series_equal(s.to_period(), expected) df = DataFrame(np.random.randn(4, 4), index=idx, columns=idx) expected = df.copy() expected.index = exp_idx tm.assert_frame_equal(df.to_period(), expected) expected = df.copy() expected.columns = exp_idx tm.assert_frame_equal(df.to_period(axis=1), expected)
def test_frame_to_period(self): K = 5 from pandas.core.indexes.period import period_range dr = date_range('1/1/2000', '1/1/2001') pr = period_range('1/1/2000', '1/1/2001') df = DataFrame(randn(len(dr), K), index=dr) df['mix'] = 'a' pts = df.to_period() exp = df.copy() exp.index = pr assert_frame_equal(pts, exp) pts = df.to_period('M') tm.assert_index_equal(pts.index, exp.index.asfreq('M')) df = df.T pts = df.to_period(axis=1) exp = df.copy() exp.columns = pr assert_frame_equal(pts, exp) pts = df.to_period('M', axis=1) tm.assert_index_equal(pts.columns, exp.columns.asfreq('M')) pytest.raises(ValueError, df.to_period, axis=2)
def test_annual_upsample(self, simple_period_range_series): ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="A-DEC") df = DataFrame({"a": ts}) rdf = df.resample("D").ffill() exp = df["a"].resample("D").ffill() tm.assert_series_equal(rdf["a"], exp) rng = period_range("2000", "2003", freq="A-DEC") ts = Series([1, 2, 3, 4], index=rng) result = ts.resample("M").ffill() ex_index = period_range("2000-01", "2003-12", freq="M") expected = ts.asfreq("M", how="start").reindex(ex_index, method="ffill") tm.assert_series_equal(result, expected)
def test_to_period(self): from pandas.core.indexes.period import period_range ts = _simple_ts('1/1/2000', '1/1/2001') pts = ts.to_period() exp = ts.copy() exp.index = period_range('1/1/2000', '1/1/2001') assert_series_equal(pts, exp) pts = ts.to_period('M') exp.index = exp.index.asfreq('M') tm.assert_index_equal(pts.index, exp.index.asfreq('M')) assert_series_equal(pts, exp) # GH 7606 without freq idx = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04']) exp_idx = pd.PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04'], freq='D') s = Series(np.random.randn(4), index=idx) expected = s.copy() expected.index = exp_idx assert_series_equal(s.to_period(), expected) df = DataFrame(np.random.randn(4, 4), index=idx, columns=idx) expected = df.copy() expected.index = exp_idx assert_frame_equal(df.to_period(), expected) expected = df.copy() expected.columns = exp_idx assert_frame_equal(df.to_period(axis=1), expected)
def test_resample_to_quarterly(self, simple_period_range_series): for month in MONTHS: ts = simple_period_range_series( '1990', '1992', freq='A-%s' % month) quar_ts = ts.resample('Q-%s' % month).ffill() stamps = ts.to_timestamp('D', how='start') qdates = period_range(ts.index[0].asfreq('D', 'start'), ts.index[-1].asfreq('D', 'end'), freq='Q-%s' % month) expected = stamps.reindex(qdates.to_timestamp('D', 's'), method='ffill') expected.index = qdates assert_series_equal(quar_ts, expected) # conforms, but different month ts = simple_period_range_series('1990', '1992', freq='A-JUN') for how in ['start', 'end']: result = ts.resample('Q-MAR', convention=how).ffill() expected = ts.asfreq('Q-MAR', how=how) expected = expected.reindex(result.index, method='ffill') # .to_timestamp('D') # expected = expected.resample('Q-MAR').ffill() assert_series_equal(result, expected)
def test_annual_upsample(self, simple_period_range_series): ts = simple_period_range_series('1/1/1990', '12/31/1995', freq='A-DEC') df = DataFrame({'a': ts}) rdf = df.resample('D').ffill() exp = df['a'].resample('D').ffill() assert_series_equal(rdf['a'], exp) rng = period_range('2000', '2003', freq='A-DEC') ts = Series([1, 2, 3, 4], index=rng) result = ts.resample('M').ffill() ex_index = period_range('2000-01', '2003-12', freq='M') expected = ts.asfreq('M', how='start').reindex(ex_index, method='ffill') assert_series_equal(result, expected)
def test_resample_tz_localized(self): dr = date_range(start="2012-4-13", end="2012-5-1") ts = Series(range(len(dr)), index=dr) ts_utc = ts.tz_localize("UTC") ts_local = ts_utc.tz_convert("America/Los_Angeles") result = ts_local.resample("W").mean() ts_local_naive = ts_local.copy() ts_local_naive.index = [ x.replace(tzinfo=None) for x in ts_local_naive.index.to_pydatetime() ] exp = ts_local_naive.resample("W").mean().tz_localize("America/Los_Angeles") tm.assert_series_equal(result, exp) # it works result = ts_local.resample("D").mean() # #2245 idx = date_range( "2001-09-20 15:59", "2001-09-20 16:00", freq="T", tz="Australia/Sydney" ) s = Series([1, 2], index=idx) result = s.resample("D", closed="right", label="right").mean() ex_index = date_range("2001-09-21", periods=1, freq="D", tz="Australia/Sydney") expected = Series([1.5], index=ex_index) tm.assert_series_equal(result, expected) # for good measure result = s.resample("D", kind="period").mean() ex_index = period_range("2001-09-20", periods=1, freq="D") expected = Series([1.5], index=ex_index) tm.assert_series_equal(result, expected) # GH 6397 # comparing an offset that doesn't propagate tz's rng = date_range("1/1/2011", periods=20000, freq="H") rng = rng.tz_localize("EST") ts = DataFrame(index=rng) ts["first"] = np.random.randn(len(rng)) ts["second"] = np.cumsum(np.random.randn(len(rng))) expected = DataFrame( { "first": ts.resample("A").sum()["first"], "second": ts.resample("A").mean()["second"], }, columns=["first", "second"], ) result = ( ts.resample("A") .agg({"first": np.sum, "second": np.mean}) .reindex(columns=["first", "second"]) ) tm.assert_frame_equal(result, expected)
def test_resample_5minute(self, freq, kind): rng = period_range("1/1/2000", "1/5/2000", freq="T") ts = Series(np.random.randn(len(rng)), index=rng) expected = ts.to_timestamp().resample(freq).mean() if kind != "timestamp": expected = expected.to_period(freq) result = ts.resample(freq, kind=kind).mean() assert_series_equal(result, expected)
def test_upsample_with_limit(self): rng = period_range('1/1/2000', periods=5, freq='A') ts = Series(np.random.randn(len(rng)), rng) result = ts.resample('M', convention='end').ffill(limit=2) expected = ts.asfreq('M').reindex(result.index, method='ffill', limit=2) assert_series_equal(result, expected)
def test_finder_monthly_long(self): rng = period_range('1988Q1', periods=24 * 12, freq='M') ser = Series(np.random.randn(len(rng)), rng) ax = ser.plot() xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] xp = Period('1989Q1', 'M').ordinal self.assertEqual(rs, xp)
def test_resample_5minute(self, freq, kind): rng = period_range('1/1/2000', '1/5/2000', freq='T') ts = Series(np.random.randn(len(rng)), index=rng) expected = ts.to_timestamp().resample(freq).mean() if kind != 'timestamp': expected = expected.to_period(freq) result = ts.resample(freq, kind=kind).mean() assert_series_equal(result, expected)
def test_finder_monthly_long(self): rng = period_range('1988Q1', periods=24 * 12, freq='M') ser = Series(np.random.randn(len(rng)), rng) ax = ser.plot() xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] xp = Period('1989Q1', 'M').ordinal assert rs == xp
def test_resample_incompat_freq(self): msg = ( "Frequency <MonthEnd> cannot be resampled to <Week: weekday=6>, " "as they are not sub or super periods") with pytest.raises(IncompatibleFrequency, match=msg): Series(range(3), index=period_range(start="2000", periods=3, freq="M")).resample("W").mean()
def test_resample_same_freq(self, resample_method): # GH12770 series = Series(range(3), index=period_range(start="2000", periods=3, freq="M")) expected = series result = getattr(series.resample("M"), resample_method)() tm.assert_series_equal(result, expected)
def test_upsample_with_limit(self): rng = period_range('1/1/2000', periods=5, freq='A') ts = Series(np.random.randn(len(rng)), rng) result = ts.resample('M', convention='end').ffill(limit=2) expected = ts.asfreq('M').reindex(result.index, method='ffill', limit=2) assert_series_equal(result, expected)
def test_resample_to_period_monthly_buglet(): # GH #1259 rng = date_range('1/1/2000', '12/31/2000') ts = Series(np.random.randn(len(rng)), index=rng) result = ts.resample('M', kind='period').mean() exp_index = period_range('Jan-2000', 'Dec-2000', freq='M') tm.assert_index_equal(result.index, exp_index)
def test_resample_to_period_monthly_buglet(): # GH #1259 rng = date_range('1/1/2000', '12/31/2000') ts = Series(np.random.randn(len(rng)), index=rng) result = ts.resample('M', kind='period').mean() exp_index = period_range('Jan-2000', 'Dec-2000', freq='M') tm.assert_index_equal(result.index, exp_index)
def test_upsample_with_limit(self): rng = period_range("1/1/2000", periods=5, freq="A") ts = Series(np.random.randn(len(rng)), rng) result = ts.resample("M", convention="end").ffill(limit=2) expected = ts.asfreq("M").reindex(result.index, method="ffill", limit=2) assert_series_equal(result, expected)
def test_resample_to_period_monthly_buglet(): # GH #1259 rng = date_range("1/1/2000", "12/31/2000") ts = Series(np.random.randn(len(rng)), index=rng) result = ts.resample("M", kind="period").mean() exp_index = period_range("Jan-2000", "Dec-2000", freq="M") tm.assert_index_equal(result.index, exp_index)
def test_resample_with_nat(self, periods, values, freq, expected_values): # GH 13224 index = PeriodIndex(periods, freq='S') frame = DataFrame(values, index=index) expected_index = period_range('1970-01-01 00:00:00', periods=len(expected_values), freq=freq) expected = DataFrame(expected_values, index=expected_index) result = frame.resample(freq).mean() assert_frame_equal(result, expected)
def test_resample_with_nat(self, periods, values, freq, expected_values): # GH 13224 index = PeriodIndex(periods, freq='S') frame = DataFrame(values, index=index) expected_index = period_range('1970-01-01 00:00:00', periods=len(expected_values), freq=freq) expected = DataFrame(expected_values, index=expected_index) result = frame.resample(freq).mean() assert_frame_equal(result, expected)
def test_resample_tz_localized(self): dr = date_range(start='2012-4-13', end='2012-5-1') ts = Series(lrange(len(dr)), dr) ts_utc = ts.tz_localize('UTC') ts_local = ts_utc.tz_convert('America/Los_Angeles') result = ts_local.resample('W').mean() ts_local_naive = ts_local.copy() ts_local_naive.index = [x.replace(tzinfo=None) for x in ts_local_naive.index.to_pydatetime()] exp = ts_local_naive.resample( 'W').mean().tz_localize('America/Los_Angeles') assert_series_equal(result, exp) # it works result = ts_local.resample('D').mean() # #2245 idx = date_range('2001-09-20 15:59', '2001-09-20 16:00', freq='T', tz='Australia/Sydney') s = Series([1, 2], index=idx) result = s.resample('D', closed='right', label='right').mean() ex_index = date_range('2001-09-21', periods=1, freq='D', tz='Australia/Sydney') expected = Series([1.5], index=ex_index) assert_series_equal(result, expected) # for good measure result = s.resample('D', kind='period').mean() ex_index = period_range('2001-09-20', periods=1, freq='D') expected = Series([1.5], index=ex_index) assert_series_equal(result, expected) # GH 6397 # comparing an offset that doesn't propagate tz's rng = date_range('1/1/2011', periods=20000, freq='H') rng = rng.tz_localize('EST') ts = DataFrame(index=rng) ts['first'] = np.random.randn(len(rng)) ts['second'] = np.cumsum(np.random.randn(len(rng))) expected = DataFrame( { 'first': ts.resample('A').sum()['first'], 'second': ts.resample('A').mean()['second']}, columns=['first', 'second']) result = ts.resample( 'A').agg({'first': np.sum, 'second': np.mean}).reindex(columns=['first', 'second']) assert_frame_equal(result, expected)
def test_resample_tz_localized(self): dr = date_range(start='2012-4-13', end='2012-5-1') ts = Series(lrange(len(dr)), dr) ts_utc = ts.tz_localize('UTC') ts_local = ts_utc.tz_convert('America/Los_Angeles') result = ts_local.resample('W').mean() ts_local_naive = ts_local.copy() ts_local_naive.index = [x.replace(tzinfo=None) for x in ts_local_naive.index.to_pydatetime()] exp = ts_local_naive.resample( 'W').mean().tz_localize('America/Los_Angeles') assert_series_equal(result, exp) # it works result = ts_local.resample('D').mean() # #2245 idx = date_range('2001-09-20 15:59', '2001-09-20 16:00', freq='T', tz='Australia/Sydney') s = Series([1, 2], index=idx) result = s.resample('D', closed='right', label='right').mean() ex_index = date_range('2001-09-21', periods=1, freq='D', tz='Australia/Sydney') expected = Series([1.5], index=ex_index) assert_series_equal(result, expected) # for good measure result = s.resample('D', kind='period').mean() ex_index = period_range('2001-09-20', periods=1, freq='D') expected = Series([1.5], index=ex_index) assert_series_equal(result, expected) # GH 6397 # comparing an offset that doesn't propagate tz's rng = date_range('1/1/2011', periods=20000, freq='H') rng = rng.tz_localize('EST') ts = DataFrame(index=rng) ts['first'] = np.random.randn(len(rng)) ts['second'] = np.cumsum(np.random.randn(len(rng))) expected = DataFrame( { 'first': ts.resample('A').sum()['first'], 'second': ts.resample('A').mean()['second']}, columns=['first', 'second']) result = ts.resample( 'A').agg({'first': np.sum, 'second': np.mean}).reindex(columns=['first', 'second']) assert_frame_equal(result, expected)
def test_finder_annual(self): import matplotlib.pyplot as plt xp = [1987, 1988, 1990, 1990, 1995, 2020, 2070, 2170] for i, nyears in enumerate([5, 10, 19, 49, 99, 199, 599, 1001]): rng = period_range('1987', periods=nyears, freq='A') ser = Series(np.random.randn(len(rng)), rng) ax = ser.plot() xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] self.assertEqual(rs, Period(xp[i], freq='A').ordinal) plt.close(ax.get_figure())
def test_basic_downsample(self, simple_period_range_series): ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="M") result = ts.resample("a-dec").mean() expected = ts.groupby(ts.index.year).mean() expected.index = period_range("1/1/1990", "6/30/1995", freq="a-dec") assert_series_equal(result, expected) # this is ok assert_series_equal(ts.resample("a-dec").mean(), result) assert_series_equal(ts.resample("a").mean(), result)
def test_basic_downsample(self, simple_period_range_series): ts = simple_period_range_series('1/1/1990', '6/30/1995', freq='M') result = ts.resample('a-dec').mean() expected = ts.groupby(ts.index.year).mean() expected.index = period_range('1/1/1990', '6/30/1995', freq='a-dec') assert_series_equal(result, expected) # this is ok assert_series_equal(ts.resample('a-dec').mean(), result) assert_series_equal(ts.resample('a').mean(), result)
def test_finder_annual(self): xp = [1987, 1988, 1990, 1990, 1995, 2020, 2070, 2170] for i, nyears in enumerate([5, 10, 19, 49, 99, 199, 599, 1001]): rng = period_range('1987', periods=nyears, freq='A') ser = Series(np.random.randn(len(rng)), rng) _, ax = self.plt.subplots() ser.plot(ax=ax) xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] assert rs == Period(xp[i], freq='A').ordinal self.plt.close(ax.get_figure())
def test_finder_annual(self): import matplotlib.pyplot as plt xp = [1987, 1988, 1990, 1990, 1995, 2020, 2070, 2170] for i, nyears in enumerate([5, 10, 19, 49, 99, 199, 599, 1001]): rng = period_range('1987', periods=nyears, freq='A') ser = Series(np.random.randn(len(rng)), rng) ax = ser.plot() xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] self.assertEqual(rs, Period(xp[i], freq='A').ordinal) plt.close(ax.get_figure())
def test_basic_downsample(self, simple_period_range_series): ts = simple_period_range_series('1/1/1990', '6/30/1995', freq='M') result = ts.resample('a-dec').mean() expected = ts.groupby(ts.index.year).mean() expected.index = period_range('1/1/1990', '6/30/1995', freq='a-dec') assert_series_equal(result, expected) # this is ok assert_series_equal(ts.resample('a-dec').mean(), result) assert_series_equal(ts.resample('a').mean(), result)
def test_resample_with_offset(self, start, end, start_freq, end_freq, offset): # GH 23882 & 31809 s = Series(0, index=period_range(start, end, freq=start_freq)) s = s + np.arange(len(s)) result = s.resample(end_freq, offset=offset).mean() result = result.to_timestamp(end_freq) expected = s.to_timestamp().resample(end_freq, offset=offset).mean() if end_freq == "M": # TODO: is non-tick the relevant characteristic? (GH 33815) expected.index = expected.index._with_freq(None) tm.assert_series_equal(result, expected)
def _get_new_index(self): """ return our new index """ ax = self.ax if len(ax) == 0: values = [] else: start = ax[0].asfreq(self.freq, how=self.convention) end = ax[-1].asfreq(self.freq, how='end') values = period_range(start, end, freq=self.freq).asi8 return ax._shallow_copy(values, freq=self.freq)
def _get_new_index(self): """ return our new index """ ax = self.ax if len(ax) == 0: values = [] else: start = ax[0].asfreq(self.freq, how=self.convention) end = ax[-1].asfreq(self.freq, how='end') values = period_range(start, end, freq=self.freq).asi8 return ax._shallow_copy(values, freq=self.freq)
def test_resample_timestamp_to_period(simple_date_range_series): ts = simple_date_range_series('1/1/1990', '1/1/2000') result = ts.resample('A-DEC', kind='period').mean() expected = ts.resample('A-DEC').mean() expected.index = period_range('1990', '2000', freq='a-dec') assert_series_equal(result, expected) result = ts.resample('A-JUN', kind='period').mean() expected = ts.resample('A-JUN').mean() expected.index = period_range('1990', '2000', freq='a-jun') assert_series_equal(result, expected) result = ts.resample('M', kind='period').mean() expected = ts.resample('M').mean() expected.index = period_range('1990-01', '2000-01', freq='M') assert_series_equal(result, expected) result = ts.resample('M', kind='period').mean() expected = ts.resample('M').mean() expected.index = period_range('1990-01', '2000-01', freq='M') assert_series_equal(result, expected)
def test_resample_timestamp_to_period(simple_date_range_series): ts = simple_date_range_series('1/1/1990', '1/1/2000') result = ts.resample('A-DEC', kind='period').mean() expected = ts.resample('A-DEC').mean() expected.index = period_range('1990', '2000', freq='a-dec') assert_series_equal(result, expected) result = ts.resample('A-JUN', kind='period').mean() expected = ts.resample('A-JUN').mean() expected.index = period_range('1990', '2000', freq='a-jun') assert_series_equal(result, expected) result = ts.resample('M', kind='period').mean() expected = ts.resample('M').mean() expected.index = period_range('1990-01', '2000-01', freq='M') assert_series_equal(result, expected) result = ts.resample('M', kind='period').mean() expected = ts.resample('M').mean() expected.index = period_range('1990-01', '2000-01', freq='M') assert_series_equal(result, expected)
def test_resample_timestamp_to_period(simple_date_range_series): ts = simple_date_range_series("1/1/1990", "1/1/2000") result = ts.resample("A-DEC", kind="period").mean() expected = ts.resample("A-DEC").mean() expected.index = period_range("1990", "2000", freq="a-dec") tm.assert_series_equal(result, expected) result = ts.resample("A-JUN", kind="period").mean() expected = ts.resample("A-JUN").mean() expected.index = period_range("1990", "2000", freq="a-jun") tm.assert_series_equal(result, expected) result = ts.resample("M", kind="period").mean() expected = ts.resample("M").mean() expected.index = period_range("1990-01", "2000-01", freq="M") tm.assert_series_equal(result, expected) result = ts.resample("M", kind="period").mean() expected = ts.resample("M").mean() expected.index = period_range("1990-01", "2000-01", freq="M") tm.assert_series_equal(result, expected)
def test_resample_to_quarterly(self, simple_period_range_series, month): ts = simple_period_range_series("1990", "1992", freq=f"A-{month}") quar_ts = ts.resample(f"Q-{month}").ffill() stamps = ts.to_timestamp("D", how="start") qdates = period_range( ts.index[0].asfreq("D", "start"), ts.index[-1].asfreq("D", "end"), freq=f"Q-{month}", ) expected = stamps.reindex(qdates.to_timestamp("D", "s"), method="ffill") expected.index = qdates tm.assert_series_equal(quar_ts, expected)
def test_finder_quarterly(self): import matplotlib.pyplot as plt xp = Period('1988Q1').ordinal yrs = [3.5, 11] for n in yrs: rng = period_range('1987Q2', periods=int(n * 4), freq='Q') ser = Series(np.random.randn(len(rng)), rng) ax = ser.plot() xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] assert rs == xp (vmin, vmax) = ax.get_xlim() ax.set_xlim(vmin + 0.9, vmax) rs = xaxis.get_majorticklocs()[0] assert xp == rs plt.close(ax.get_figure())
def setup_method(self, method): TestPlotBase.setup_method(self, method) freq = ['S', 'T', 'H', 'D', 'W', 'M', 'Q', 'A'] idx = [period_range('12/31/1999', freq=x, periods=100) for x in freq] self.period_ser = [Series(np.random.randn(len(x)), x) for x in idx] self.period_df = [DataFrame(np.random.randn(len(x), 3), index=x, columns=['A', 'B', 'C']) for x in idx] freq = ['S', 'T', 'H', 'D', 'W', 'M', 'Q-DEC', 'A', '1B30Min'] idx = [date_range('12/31/1999', freq=x, periods=100) for x in freq] self.datetime_ser = [Series(np.random.randn(len(x)), x) for x in idx] self.datetime_df = [DataFrame(np.random.randn(len(x), 3), index=x, columns=['A', 'B', 'C']) for x in idx]
def test_finder_monthly(self): xp = Period('Jan 1988').ordinal yrs = [1.15, 2.5, 4, 11] for n in yrs: rng = period_range('1987Q2', periods=int(n * 12), freq='M') ser = Series(np.random.randn(len(rng)), rng) _, ax = self.plt.subplots() ser.plot(ax=ax) xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] assert rs == xp vmin, vmax = ax.get_xlim() ax.set_xlim(vmin + 0.9, vmax) rs = xaxis.get_majorticklocs()[0] assert xp == rs self.plt.close(ax.get_figure())
def test_finder_monthly(self): import matplotlib.pyplot as plt xp = Period('Jan 1988').ordinal yrs = [1.15, 2.5, 4, 11] for n in yrs: rng = period_range('1987Q2', periods=int(n * 12), freq='M') ser = Series(np.random.randn(len(rng)), rng) ax = ser.plot() xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] self.assertEqual(rs, xp) vmin, vmax = ax.get_xlim() ax.set_xlim(vmin + 0.9, vmax) rs = xaxis.get_majorticklocs()[0] self.assertEqual(xp, rs) plt.close(ax.get_figure())
def test_finder_monthly(self): import matplotlib.pyplot as plt xp = Period('Jan 1988').ordinal yrs = [1.15, 2.5, 4, 11] for n in yrs: rng = period_range('1987Q2', periods=int(n * 12), freq='M') ser = Series(np.random.randn(len(rng)), rng) ax = ser.plot() xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] self.assertEqual(rs, xp) vmin, vmax = ax.get_xlim() ax.set_xlim(vmin + 0.9, vmax) rs = xaxis.get_majorticklocs()[0] self.assertEqual(xp, rs) plt.close(ax.get_figure())
def test_loffset_returns_datetimeindex(self, frame, kind, agg_arg): # make sure passing loffset returns DatetimeIndex in all cases # basic method taken from Base.test_resample_loffset_arg_type() df = frame expected_means = [df.values[i:i + 2].mean() for i in range(0, len(df.values), 2)] expected_index = period_range( df.index[0], periods=len(df.index) / 2, freq='2D') # loffset coerces PeriodIndex to DateTimeIndex expected_index = expected_index.to_timestamp() expected_index += timedelta(hours=2) expected = DataFrame({'value': expected_means}, index=expected_index) result_agg = df.resample('2D', loffset='2H', kind=kind).agg(agg_arg) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result_how = df.resample('2D', how=agg_arg, loffset='2H', kind=kind) if isinstance(agg_arg, list): expected.columns = pd.MultiIndex.from_tuples([('value', 'mean')]) assert_frame_equal(result_agg, expected) assert_frame_equal(result_how, expected)
def _daily_finder(vmin, vmax, freq): periodsperday = -1 if freq >= FreqGroup.FR_HR: if freq == FreqGroup.FR_NS: periodsperday = 24 * 60 * 60 * 1000000000 elif freq == FreqGroup.FR_US: periodsperday = 24 * 60 * 60 * 1000000 elif freq == FreqGroup.FR_MS: periodsperday = 24 * 60 * 60 * 1000 elif freq == FreqGroup.FR_SEC: periodsperday = 24 * 60 * 60 elif freq == FreqGroup.FR_MIN: periodsperday = 24 * 60 elif freq == FreqGroup.FR_HR: periodsperday = 24 else: # pragma: no cover raise ValueError("unexpected frequency: {freq}".format(freq=freq)) periodsperyear = 365 * periodsperday periodspermonth = 28 * periodsperday elif freq == FreqGroup.FR_BUS: periodsperyear = 261 periodspermonth = 19 elif freq == FreqGroup.FR_DAY: periodsperyear = 365 periodspermonth = 28 elif resolution.get_freq_group(freq) == FreqGroup.FR_WK: periodsperyear = 52 periodspermonth = 3 else: # pragma: no cover raise ValueError("unexpected frequency") # save this for later usage vmin_orig = vmin (vmin, vmax) = (Period(ordinal=int(vmin), freq=freq), Period(ordinal=int(vmax), freq=freq)) span = vmax.ordinal - vmin.ordinal + 1 dates_ = period_range(start=vmin, end=vmax, freq=freq) # Initialize the output info = np.zeros(span, dtype=[('val', np.int64), ('maj', bool), ('min', bool), ('fmt', '|S20')]) info['val'][:] = dates_._ndarray_values info['fmt'][:] = '' info['maj'][[0, -1]] = True # .. and set some shortcuts info_maj = info['maj'] info_min = info['min'] info_fmt = info['fmt'] def first_label(label_flags): if (label_flags[0] == 0) and (label_flags.size > 1) and \ ((vmin_orig % 1) > 0.0): return label_flags[1] else: return label_flags[0] # Case 1. Less than a month if span <= periodspermonth: day_start = period_break(dates_, 'day') month_start = period_break(dates_, 'month') def _hour_finder(label_interval, force_year_start): _hour = dates_.hour _prev_hour = (dates_ - 1 * dates_.freq).hour hour_start = (_hour - _prev_hour) != 0 info_maj[day_start] = True info_min[hour_start & (_hour % label_interval == 0)] = True year_start = period_break(dates_, 'year') info_fmt[hour_start & (_hour % label_interval == 0)] = '%H:%M' info_fmt[day_start] = '%H:%M\n%d-%b' info_fmt[year_start] = '%H:%M\n%d-%b\n%Y' if force_year_start and not has_level_label(year_start, vmin_orig): info_fmt[first_label(day_start)] = '%H:%M\n%d-%b\n%Y' def _minute_finder(label_interval): hour_start = period_break(dates_, 'hour') _minute = dates_.minute _prev_minute = (dates_ - 1 * dates_.freq).minute minute_start = (_minute - _prev_minute) != 0 info_maj[hour_start] = True info_min[minute_start & (_minute % label_interval == 0)] = True year_start = period_break(dates_, 'year') info_fmt = info['fmt'] info_fmt[minute_start & (_minute % label_interval == 0)] = '%H:%M' info_fmt[day_start] = '%H:%M\n%d-%b' info_fmt[year_start] = '%H:%M\n%d-%b\n%Y' def _second_finder(label_interval): minute_start = period_break(dates_, 'minute') _second = dates_.second _prev_second = (dates_ - 1 * dates_.freq).second second_start = (_second - _prev_second) != 0 info['maj'][minute_start] = True info['min'][second_start & (_second % label_interval == 0)] = True year_start = period_break(dates_, 'year') info_fmt = info['fmt'] info_fmt[second_start & (_second % label_interval == 0)] = '%H:%M:%S' info_fmt[day_start] = '%H:%M:%S\n%d-%b' info_fmt[year_start] = '%H:%M:%S\n%d-%b\n%Y' if span < periodsperday / 12000.0: _second_finder(1) elif span < periodsperday / 6000.0: _second_finder(2) elif span < periodsperday / 2400.0: _second_finder(5) elif span < periodsperday / 1200.0: _second_finder(10) elif span < periodsperday / 800.0: _second_finder(15) elif span < periodsperday / 400.0: _second_finder(30) elif span < periodsperday / 150.0: _minute_finder(1) elif span < periodsperday / 70.0: _minute_finder(2) elif span < periodsperday / 24.0: _minute_finder(5) elif span < periodsperday / 12.0: _minute_finder(15) elif span < periodsperday / 6.0: _minute_finder(30) elif span < periodsperday / 2.5: _hour_finder(1, False) elif span < periodsperday / 1.5: _hour_finder(2, False) elif span < periodsperday * 1.25: _hour_finder(3, False) elif span < periodsperday * 2.5: _hour_finder(6, True) elif span < periodsperday * 4: _hour_finder(12, True) else: info_maj[month_start] = True info_min[day_start] = True year_start = period_break(dates_, 'year') info_fmt = info['fmt'] info_fmt[day_start] = '%d' info_fmt[month_start] = '%d\n%b' info_fmt[year_start] = '%d\n%b\n%Y' if not has_level_label(year_start, vmin_orig): if not has_level_label(month_start, vmin_orig): info_fmt[first_label(day_start)] = '%d\n%b\n%Y' else: info_fmt[first_label(month_start)] = '%d\n%b\n%Y' # Case 2. Less than three months elif span <= periodsperyear // 4: month_start = period_break(dates_, 'month') info_maj[month_start] = True if freq < FreqGroup.FR_HR: info['min'] = True else: day_start = period_break(dates_, 'day') info['min'][day_start] = True week_start = period_break(dates_, 'week') year_start = period_break(dates_, 'year') info_fmt[week_start] = '%d' info_fmt[month_start] = '\n\n%b' info_fmt[year_start] = '\n\n%b\n%Y' if not has_level_label(year_start, vmin_orig): if not has_level_label(month_start, vmin_orig): info_fmt[first_label(week_start)] = '\n\n%b\n%Y' else: info_fmt[first_label(month_start)] = '\n\n%b\n%Y' # Case 3. Less than 14 months ............... elif span <= 1.15 * periodsperyear: year_start = period_break(dates_, 'year') month_start = period_break(dates_, 'month') week_start = period_break(dates_, 'week') info_maj[month_start] = True info_min[week_start] = True info_min[year_start] = False info_min[month_start] = False info_fmt[month_start] = '%b' info_fmt[year_start] = '%b\n%Y' if not has_level_label(year_start, vmin_orig): info_fmt[first_label(month_start)] = '%b\n%Y' # Case 4. Less than 2.5 years ............... elif span <= 2.5 * periodsperyear: year_start = period_break(dates_, 'year') quarter_start = period_break(dates_, 'quarter') month_start = period_break(dates_, 'month') info_maj[quarter_start] = True info_min[month_start] = True info_fmt[quarter_start] = '%b' info_fmt[year_start] = '%b\n%Y' # Case 4. Less than 4 years ................. elif span <= 4 * periodsperyear: year_start = period_break(dates_, 'year') month_start = period_break(dates_, 'month') info_maj[year_start] = True info_min[month_start] = True info_min[year_start] = False month_break = dates_[month_start].month jan_or_jul = month_start[(month_break == 1) | (month_break == 7)] info_fmt[jan_or_jul] = '%b' info_fmt[year_start] = '%b\n%Y' # Case 5. Less than 11 years ................ elif span <= 11 * periodsperyear: year_start = period_break(dates_, 'year') quarter_start = period_break(dates_, 'quarter') info_maj[year_start] = True info_min[quarter_start] = True info_min[year_start] = False info_fmt[year_start] = '%Y' # Case 6. More than 12 years ................ else: year_start = period_break(dates_, 'year') year_break = dates_[year_start].year nyears = span / periodsperyear (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) major_idx = year_start[(year_break % maj_anndef == 0)] info_maj[major_idx] = True minor_idx = year_start[(year_break % min_anndef == 0)] info_min[minor_idx] = True info_fmt[major_idx] = '%Y' return info
def simple_period_range_series(start, end, freq='D'): """ Series with period range index and random data for test purposes. """ rng = period_range(start, end, freq=freq) return Series(np.random.randn(len(rng)), index=rng)
def _simple_period_range_series(start, end, freq='D'): rng = period_range(start, end, freq=freq) return Series(np.random.randn(len(rng)), index=rng)