def test_at_time(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = Series(np.random.randn(len(rng)), index=rng) df = DataFrame(np.random.randn(len(rng), 3), index=rng) result = ts[time(9, 30)] result_df = df.ix[time(9, 30)] expected = ts[(rng.hour == 9) & (rng.minute == 30)] exp_df = df[(rng.hour == 9) & (rng.minute == 30)] # expected.index = date_range('1/1/2000', '1/4/2000') assert_series_equal(result, expected) tm.assert_frame_equal(result_df, exp_df) chunk = df.ix['1/4/2000':] result = chunk.ix[time(9, 30)] expected = result_df[-1:] tm.assert_frame_equal(result, expected) # midnight, everything rng = date_range('1/1/2000', '1/31/2000') ts = Series(np.random.randn(len(rng)), index=rng) result = ts.at_time(time(0, 0)) assert_series_equal(result, ts)
def test_at_time(self): rng = date_range("1/1/2000", "1/5/2000", freq="5min") ts = Series(np.random.randn(len(rng)), index=rng) rs = ts.at_time(rng[1]) assert (rs.index.hour == rng[1].hour).all() assert (rs.index.minute == rng[1].minute).all() assert (rs.index.second == rng[1].second).all() result = ts.at_time("9:30") expected = ts.at_time(time(9, 30)) tm.assert_series_equal(result, expected) df = DataFrame(np.random.randn(len(rng), 3), index=rng) result = ts[time(9, 30)] result_df = df.loc[time(9, 30)] expected = ts[(rng.hour == 9) & (rng.minute == 30)] exp_df = df[(rng.hour == 9) & (rng.minute == 30)] result.index = result.index._with_freq(None) tm.assert_series_equal(result, expected) tm.assert_frame_equal(result_df, exp_df) chunk = df.loc["1/4/2000":] result = chunk.loc[time(9, 30)] expected = result_df[-1:] # Without resetting the freqs, these are 5 min and 1440 min, respectively result.index = result.index._with_freq(None) expected.index = expected.index._with_freq(None) tm.assert_frame_equal(result, expected) # midnight, everything rng = date_range("1/1/2000", "1/31/2000") ts = Series(np.random.randn(len(rng)), index=rng) result = ts.at_time(time(0, 0)) tm.assert_series_equal(result, ts) # time doesn't exist rng = date_range("1/1/2012", freq="23Min", periods=384) ts = Series(np.random.randn(len(rng)), rng) rs = ts.at_time("16:00") assert len(rs) == 0
def test_at_time(self): rng = date_range("1/1/2000", "1/5/2000", freq="5min") ts = Series(np.random.randn(len(rng)), index=rng) rs = ts.at_time(rng[1]) assert (rs.index.hour == rng[1].hour).all() assert (rs.index.minute == rng[1].minute).all() assert (rs.index.second == rng[1].second).all() result = ts.at_time("9:30") expected = ts.at_time(time(9, 30)) tm.assert_series_equal(result, expected) df = DataFrame(np.random.randn(len(rng), 3), index=rng) result = ts[time(9, 30)] result_df = df.loc[time(9, 30)] expected = ts[(rng.hour == 9) & (rng.minute == 30)] exp_df = df[(rng.hour == 9) & (rng.minute == 30)] # FIXME: dont leave commented-out # expected.index = date_range('1/1/2000', '1/4/2000') tm.assert_series_equal(result, expected) tm.assert_frame_equal(result_df, exp_df) chunk = df.loc["1/4/2000":] result = chunk.loc[time(9, 30)] expected = result_df[-1:] tm.assert_frame_equal(result, expected) # midnight, everything rng = date_range("1/1/2000", "1/31/2000") ts = Series(np.random.randn(len(rng)), index=rng) result = ts.at_time(time(0, 0)) tm.assert_series_equal(result, ts) # time doesn't exist rng = date_range("1/1/2012", freq="23Min", periods=384) ts = Series(np.random.randn(len(rng)), rng) rs = ts.at_time("16:00") assert len(rs) == 0
def test_at_time(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = Series(np.random.randn(len(rng)), index=rng) rs = ts.at_time(rng[1]) self.assertTrue((rs.index.hour == rng[1].hour).all()) self.assertTrue((rs.index.minute == rng[1].minute).all()) self.assertTrue((rs.index.second == rng[1].second).all()) result = ts.at_time('9:30') expected = ts.at_time(time(9, 30)) assert_series_equal(result, expected) df = DataFrame(np.random.randn(len(rng), 3), index=rng) result = ts[time(9, 30)] result_df = df.loc[time(9, 30)] expected = ts[(rng.hour == 9) & (rng.minute == 30)] exp_df = df[(rng.hour == 9) & (rng.minute == 30)] # expected.index = date_range('1/1/2000', '1/4/2000') assert_series_equal(result, expected) tm.assert_frame_equal(result_df, exp_df) chunk = df.loc['1/4/2000':] result = chunk.loc[time(9, 30)] expected = result_df[-1:] tm.assert_frame_equal(result, expected) # midnight, everything rng = date_range('1/1/2000', '1/31/2000') ts = Series(np.random.randn(len(rng)), index=rng) result = ts.at_time(time(0, 0)) assert_series_equal(result, ts) # time doesn't exist rng = date_range('1/1/2012', freq='23Min', periods=384) ts = Series(np.random.randn(len(rng)), rng) rs = ts.at_time('16:00') self.assertEqual(len(rs), 0)
def test_localized_at_time(self, tzstr): tz = timezones.maybe_get_tz(tzstr) rng = date_range("4/16/2012", "5/1/2012", freq="H") ts = Series(np.random.randn(len(rng)), index=rng) ts_local = ts.tz_localize(tzstr) result = ts_local.at_time(time(10, 0)) expected = ts.at_time(time(10, 0)).tz_localize(tzstr) tm.assert_series_equal(result, expected) assert timezones.tz_compare(result.index.tz, tz)
def test_localized_at_time_between_time(self): from datetime import time rng = date_range('4/16/2012', '5/1/2012', freq='H') ts = Series(np.random.randn(len(rng)), index=rng) ts_local = ts.tz_localize('US/Eastern') result = ts_local.at_time(time(10, 0)) expected = ts.at_time(time(10, 0)).tz_localize('US/Eastern') assert_series_equal(result, expected) self.assert_(result.index.tz.zone == 'US/Eastern') t1, t2 = time(10, 0), time(11, 0) result = ts_local.between_time(t1, t2) expected = ts.between_time(t1, t2).tz_localize('US/Eastern') assert_series_equal(result, expected) self.assert_(result.index.tz.zone == 'US/Eastern')
def test_localized_at_time_between_time(self): from datetime import time rng = date_range("4/16/2012", "5/1/2012", freq="H") ts = Series(np.random.randn(len(rng)), index=rng) ts_local = ts.tz_localize(self.tzstr("US/Eastern")) result = ts_local.at_time(time(10, 0)) expected = ts.at_time(time(10, 0)).tz_localize(self.tzstr("US/Eastern")) tm.assert_series_equal(result, expected) self.assertTrue(self.cmptz(result.index.tz, self.tz("US/Eastern"))) t1, t2 = time(10, 0), time(11, 0) result = ts_local.between_time(t1, t2) expected = ts.between_time(t1, t2).tz_localize(self.tzstr("US/Eastern")) tm.assert_series_equal(result, expected) self.assertTrue(self.cmptz(result.index.tz, self.tz("US/Eastern")))
def test_localized_at_time_between_time(self, tzstr): from datetime import time tz = timezones.maybe_get_tz(tzstr) rng = date_range('4/16/2012', '5/1/2012', freq='H') ts = Series(np.random.randn(len(rng)), index=rng) ts_local = ts.tz_localize(tzstr) result = ts_local.at_time(time(10, 0)) expected = ts.at_time(time(10, 0)).tz_localize(tzstr) tm.assert_series_equal(result, expected) assert timezones.tz_compare(result.index.tz, tz) t1, t2 = time(10, 0), time(11, 0) result = ts_local.between_time(t1, t2) expected = ts.between_time(t1, t2).tz_localize(tzstr) tm.assert_series_equal(result, expected) assert timezones.tz_compare(result.index.tz, tz)
def test_at_time_raises(self): # GH20725 ser = Series("a b c".split()) msg = "Index must be DatetimeIndex" with pytest.raises(TypeError, match=msg): ser.at_time("00:00")
import numpy as np from datetime import time from pandas import Series, DataFrame # 时间和'最当前'数据选取 # 对于长时间市场数据,选取每天特定时间的价格数据 # 对于数据不规整, 要数据规整 rng = pd.date_range('2012-06-01 09:30', '2012-06-06 15:59', freq='T') ts = Series(np.arange(len(rng), dtype=float), index=rng) print ts # 十点 print ts[time(10, 0)] # 等价于 # 可以使用实例方法at_time print ts.at_time(time(10, 0)) # 选取两个Time对象之间的值 # 选取10:00到10:01之间 print ts.between_time(time(10, 0), time(10, 1)) # 得到上午10之前最后出现的那个值 indexer = np.sort(np.random.permutation(len(ts))[700:]) irr_ts = ts.copy() irr_ts[indexer] = np.nan print irr_ts['2012-06-01 09:50': '2012-06-01 10:00'] # 如果将一组Timestamp传入asof方法, 就可以得到这些时间点处的有效值(非NA) selection = pd.date_range('2012-06-01 10:00', periods=4, freq='B') print irr_ts.asof(selection)
print(infl_q) #索引匹配并填充缺失值 k = infl_q.reindex(gdp.index, method='ffill') print(k) #时间和最当前数据提取 rng = pd.date_range('2012-06-01 09:30', '2012-06-01 15:59', freq='T') # 交易时段按分钟采样 rng = rng.append([rng + pd.offsets.BDay(i) for i in range(1, 4)]) # 再补4天 ts = Series(np.arange(len(rng), dtype=float), index=rng) print(ts.head()) print(ts.tail()) #利用python的datetime.time对象进行索引即可抽取出这些时间点上的值 print(ts[time(10, 0)]) #抽取10点的数据 print(ts.at_time(time(10, 0))) #between_time方法,它用于选取两个Time对象之间的值 j = ts.between_time(time(10, 0), time(10, 1)) #定位到时间段 print(j) #如果刚好没有数据落在某个具体时间上,希望得到上午10点之前最后出现的值 #根据ts随机选个排列,并选择700条及以后的数据进行排序 indexer = np.sort(np.random.permutation(len(ts))[700:]) irr_ts = ts.copy() #根据随机下标索引把一部分时间点数据设置为NA irr_ts[indexer] = np.nan print(irr_ts['2012-06-01 09:50':'2012-06-01 10:00']) #如果将一组Timestamp传入asof方法,就能得到这些时间点处的有效值 #连续四个工作日的上午10点 selection = pd.date_range('2012-06-01 10:00', periods=4, freq='B') #上面随机几次,确保某个10点数据为NA.asof的话会拿最近数据填充
gdp = Series([1.78, 1.94, 2.08, 2.01, 2.15, 2.31, 2.46], index=pd.period_range('1984Q2', periods=7, freq='Q-SEP')) inf1 = Series([0.025, 0.045, 0.037, 0.04], index=pd.period_range('1982', periods=4, freq='A-DEC')) inf1_q = inf1.asfreq('Q-SEP', how='end') print(inf1_q) print(inf1_q.reindex(gdp.index, method='ffill')) rng = pd.date_range('2012-06-01 09:30', '2012-06-01 15:59', freq='T') rng = rng.append([rng + pd.offsets.BDay(i) for i in range(1, 4)]) ts = Series(np.arange(len(rng), dtype=float), index=rng) print(ts) print(ts[time(10, 0)]) print(ts.at_time(time(10, 0))) print(ts.between_time(time(10, 0), time(10, 1))) indexer = np.sort(np.random.permutation(len(ts))[700:]) irr_ts = ts.copy() irr_ts[indexer] = np.nan print(irr_ts['2012-06-01 09:50':'2012-06-01 10:00']) selection = pd.date_range('2012-06-01 10:00', periods=4, freq='B') print(irr_ts.asof(selection)) data1 = DataFrame(np.ones((6, 3), dtype=float), columns=['a', 'b', 'c'], index=pd.date_range('6/12/2012', periods=6)) data2 = DataFrame(np.ones((6, 3), dtype=float) * 2, columns=['a', 'b', 'c'],