def test_to_period(self): rng = date_range("1/1/2000", "1/1/2001", freq="D") ts = Series(np.random.randn(len(rng)), index=rng) pts = ts.to_period() exp = ts.copy() exp.index = period_range("1/1/2000", "1/1/2001") tm.assert_series_equal(pts, exp) pts = ts.to_period("M") exp.index = exp.index.asfreq("M") tm.assert_index_equal(pts.index, exp.index.asfreq("M")) tm.assert_series_equal(pts, exp) # GH#7606 without freq idx = DatetimeIndex( ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"]) exp_idx = PeriodIndex( ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], freq="D") s = Series(np.random.randn(4), index=idx) expected = s.copy() expected.index = exp_idx tm.assert_series_equal(s.to_period(), expected) df = DataFrame(np.random.randn(4, 4), index=idx, columns=idx) expected = df.copy() expected.index = exp_idx tm.assert_frame_equal(df.to_period(), expected) expected = df.copy() expected.columns = exp_idx tm.assert_frame_equal(df.to_period(axis=1), expected)
def test_to_period_raises(self, index): # https://github.com/pandas-dev/pandas/issues/33327 ser = Series(index=index, dtype=object) if not isinstance(index, DatetimeIndex): msg = f"unsupported Type {type(index).__name__}" with pytest.raises(TypeError, match=msg): ser.to_period()
def periodTs(): rng = Series(np.random.randn(365),index= pd.date_range('1/1/2000', periods=365, freq='D')) print (rng.index) tsQ = rng.to_period(freq='Q-DEC') tsM = rng.to_period(freq='M') tsW = rng.to_period(freq='W') print (tsQ) print (tsM) print (tsW)
def test_append_concat(self): # #1815 d1 = date_range('12/31/1990', '12/31/1999', freq='A-DEC') d2 = date_range('12/31/2000', '12/31/2009', freq='A-DEC') s1 = Series(np.random.randn(10), d1) s2 = Series(np.random.randn(10), d2) s1 = s1.to_period() s2 = s2.to_period() # drops index result = pd.concat([s1, s2]) assert isinstance(result.index, PeriodIndex) assert result.index[0] == s1.index[0]
def test_append_concat(self): # #1815 d1 = date_range("12/31/1990", "12/31/1999", freq="A-DEC") d2 = date_range("12/31/2000", "12/31/2009", freq="A-DEC") s1 = Series(np.random.randn(10), d1) s2 = Series(np.random.randn(10), d2) s1 = s1.to_period() s2 = s2.to_period() # drops index result = pd.concat([s1, s2]) assert isinstance(result.index, PeriodIndex) assert result.index[0] == s1.index[0]
def test_append_concat(self): # #1815 d1 = date_range('12/31/1990', '12/31/1999', freq='A-DEC') d2 = date_range('12/31/2000', '12/31/2009', freq='A-DEC') s1 = Series(np.random.randn(10), d1) s2 = Series(np.random.randn(10), d2) s1 = s1.to_period() s2 = s2.to_period() # drops index result = pd.concat([s1, s2]) tm.assertIsInstance(result.index, PeriodIndex) self.assertEqual(result.index[0], s1.index[0])
def test_to_period(self): from pandas.core.indexes.period import period_range ts = _simple_ts("1/1/2000", "1/1/2001") pts = ts.to_period() exp = ts.copy() exp.index = period_range("1/1/2000", "1/1/2001") tm.assert_series_equal(pts, exp) pts = ts.to_period("M") exp.index = exp.index.asfreq("M") tm.assert_index_equal(pts.index, exp.index.asfreq("M")) tm.assert_series_equal(pts, exp) # GH 7606 without freq idx = DatetimeIndex( ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"]) exp_idx = pd.PeriodIndex( ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], freq="D") s = Series(np.random.randn(4), index=idx) expected = s.copy() expected.index = exp_idx tm.assert_series_equal(s.to_period(), expected) df = DataFrame(np.random.randn(4, 4), index=idx, columns=idx) expected = df.copy() expected.index = exp_idx tm.assert_frame_equal(df.to_period(), expected) expected = df.copy() expected.columns = exp_idx tm.assert_frame_equal(df.to_period(axis=1), expected)
def maybe_resample(series: Series, ax: Axes, kwargs): # resample against axes freq if necessary freq, ax_freq = _get_freq(ax, series) if freq is None: # pragma: no cover raise ValueError("Cannot use dynamic axis without frequency info") # Convert DatetimeIndex to PeriodIndex if isinstance(series.index, ABCDatetimeIndex): series = series.to_period(freq=freq) if ax_freq is not None and freq != ax_freq: if is_superperiod(freq, ax_freq): # upsample input series = series.copy() # error: "Index" has no attribute "asfreq" series.index = series.index.asfreq( # type: ignore[attr-defined] ax_freq, how="s") freq = ax_freq elif _is_sup(freq, ax_freq): # one is weekly how = kwargs.pop("how", "last") series = getattr(series.resample("D"), how)().dropna() series = getattr(series.resample(ax_freq), how)().dropna() freq = ax_freq elif is_subperiod(freq, ax_freq) or _is_sub(freq, ax_freq): _upsample_others(ax, freq, kwargs) else: # pragma: no cover raise ValueError("Incompatible frequency conversion") return freq, series
def test_to_period(self): from pandas.tseries.period import period_range ts = _simple_ts('1/1/2000', '1/1/2001') pts = ts.to_period() exp = ts.copy() exp.index = period_range('1/1/2000', '1/1/2001') assert_series_equal(pts, exp) pts = ts.to_period('M') exp.index = exp.index.asfreq('M') tm.assert_index_equal(pts.index, exp.index.asfreq('M')) assert_series_equal(pts, exp) # GH 7606 without freq idx = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04']) exp_idx = pd.PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04'], freq='D') s = Series(np.random.randn(4), index=idx) expected = s.copy() expected.index = exp_idx assert_series_equal(s.to_period(), expected) df = DataFrame(np.random.randn(4, 4), index=idx, columns=idx) expected = df.copy() expected.index = exp_idx assert_frame_equal(df.to_period(), expected) expected = df.copy() expected.columns = exp_idx assert_frame_equal(df.to_period(axis=1), expected)
def test_to_period(self): from pandas.core.indexes.period import period_range ts = _simple_ts('1/1/2000', '1/1/2001') pts = ts.to_period() exp = ts.copy() exp.index = period_range('1/1/2000', '1/1/2001') assert_series_equal(pts, exp) pts = ts.to_period('M') exp.index = exp.index.asfreq('M') tm.assert_index_equal(pts.index, exp.index.asfreq('M')) assert_series_equal(pts, exp) # GH 7606 without freq idx = DatetimeIndex( ['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04']) exp_idx = pd.PeriodIndex( ['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04'], freq='D') s = Series(np.random.randn(4), index=idx) expected = s.copy() expected.index = exp_idx assert_series_equal(s.to_period(), expected) df = DataFrame(np.random.randn(4, 4), index=idx, columns=idx) expected = df.copy() expected.index = exp_idx assert_frame_equal(df.to_period(), expected) expected = df.copy() expected.columns = exp_idx assert_frame_equal(df.to_period(axis=1), expected)
def test_resample_basic(self): # GH3609 s = Series(range(100), index=date_range( '20130101', freq='s', periods=100, name='idx'), dtype='float') s[10:30] = np.nan index = PeriodIndex([ Period('2013-01-01 00:00', 'T'), Period('2013-01-01 00:01', 'T')], name='idx') expected = Series([34.5, 79.5], index=index) result = s.to_period().resample('T', kind='period').mean() assert_series_equal(result, expected) result2 = s.resample('T', kind='period').mean() assert_series_equal(result2, expected)
def slide11(): rng = pd.date_range('1/1/2000', periods=3, freq='M') ts = Series(np.random.randn(3), index=rng) pts = ts.to_period() print ts print pts rng = pd.date_range('1/29/2000', periods=6, freq='D') ts2 = Series(np.random.randn(6), index=rng) print ts2.to_period('M') print 'convert back' pts = ts.to_period() print pts print pts.to_timestamp(how='end') print 'macrodata' data = pd.read_csv('../pydata-book/ch08/macrodata.csv') print data print data.year print data.quarter index = pd.PeriodIndex(year=data.year, quarter=data.quarter, freq='Q-DEC') print index data.index = index print data.infl
def test_resample_basic(self): # GH3609 s = Series( range(100), index=date_range("20130101", freq="s", periods=100, name="idx"), dtype="float", ) s[10:30] = np.nan index = PeriodIndex( [Period("2013-01-01 00:00", "T"), Period("2013-01-01 00:01", "T")], name="idx", ) expected = Series([34.5, 79.5], index=index) result = s.to_period().resample("T", kind="period").mean() tm.assert_series_equal(result, expected) result2 = s.resample("T", kind="period").mean() tm.assert_series_equal(result2, expected)
p p.asfreq('D', 'start') p.asfreq('D', 'end') p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60 p4pm p4pm.to_timestamp() rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN') ts = Series(np.arange(len(rng)), index=rng) ts new_rng = (rng.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60 ts.index = new_rng.to_timestamp() ts rng = pd.date_range('1/1/2000', periods=3, freq='M') from numpy.random import randn ts = Series(randn(3), index=rng) pts = ts.to_period() ts pts rng = pd.date_range('1/29/2000', periods=6, freq='D') ts2 = Series(randn(6), index=rng) ts2.to_period('M') pts = ts.to_period() pts pts.to_timestamp(how='end') #重抽样 rng = pd.date_range('1/1/2000', periods=100, freq='D') ts = Series(randn(len(rng)), index=rng) ts.resample('M', how='mean') ts.resample('M', how='mean', kind='period')
# print pd.date_range('1/1/2000', '1/3/2000 23:59', freq='4h') # print pd.date_range('1/1/2000', periods=10, freq='1h30min') ts = Series(np.random.randn(4), index=pd.date_range('1/1/2000', periods=4, freq='M')) # print ts.shift(1) - 1 # print pytz.common_timezones[-5:] rng = pd.period_range('1/1/2000', '6/30/2000', freq='M') # print rng rng = pd.period_range('2010Q3', '2012Q4', freq='Q-JAN') rng = pd.date_range('1/1/2000', periods=3, freq='M') ts = Series(randn(3), index=rng) pts = ts.to_period() # print pts data = pd.read_csv('ch06/macrodata.csv') index = pd.PeriodIndex(year=data.year, quarter=data.quarter, freq='Q-DEC') data.index = index # print data.infl rng = pd.date_range('1/1/2000', periods=100, freq='D') ts = Series(randn(len(rng)), index=rng) # print ts # print ts.resample('M', how='mean') rng = pd.date_range('1/1/2000', periods=12, freq='T') ts = Series(np.arange(12), index=rng) # print ts.resample('5min', closed='left', label='left').sum()
p.asfreq('D', 'start') # In[39]: p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60 p4pm # In[40]: p4pm.to_timestamp() # In[41]: rng = pd.date_range('1/29/2000', periods=6, freq='D') ts2 = Series(np.random.randn(6), index=rng) ts2.to_period('M') # In[42]: macrodata_file = open('F:/电影/数据分析/pydata-book-master/ch08/macrodata.csv') data = pd.read_csv(macrodata_file) # In[44]: data.head() # In[45]: index = pd.PeriodIndex(year=data.year, quarter=data.quarter, freq='Q-DEC') data.index = index
ts = Series(np.arange(len(rngb)), index=rngb) print(ts) #频率变成分钟,再加16小时 #最后一个工作日减1的下午4点 new_rngb = (rngb.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60 ts.index = new_rngb.to_timestamp() print(ts) #将Timestamp转换为Period(及其反向过程) #通过使用to_period方法,可以将时间戳索引的Series和DataFrame对象 #转换为以时期索引 rngc = pd.date_range('1/1/2000', periods=3, freq='M') print(rngc) tsc = Series(np.random.randn(3), index=rngc) print(tsc) pts = tsc.to_period() print(pts) rngd = pd.date_range('1/29/2000', periods=6, freq="D") ts3 = Series(np.random.randn(6), index=rngd) print(ts3.to_period('M')) #要转换为时间戳,使用to_timestamp即可 pts = ts3.to_period() print(pts) print(pts.to_timestamp(how='end')) data = pd.read_csv('/home/jethro/文档/pydata-book-master/data/macrodata.csv') print(data.year) print(data.quarter) #将这两个数组以及一个频率传入PeriodIndex,就可以将它们合并成DataFrame的一个索引 index = pd.PeriodIndex(year=data.year, quarter=data.quarter, freq='Q-DEC')
p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60 print(p4pm) rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN') ts = Series(np.arange(len(rng)), index=rng) print(ts) new_rng = (rng.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60 ts.index = new_rng.to_timestamp() print("This is ts", ts) # 11.5.3将时间戳转换为区间(以及逆转换) rng = pd.date_range('2000-01-01', periods=3, freq='M') ts = Series(np.random.randn(len(rng)), index=rng) print(ts) pts = ts.to_period() print(pts) rng = pd.date_range('1/29/2000', periods=6, freq='D') ts2 = Series(np.random.randn(6), index=rng) print(ts2) print(ts2.to_period('M')) pts = ts2.to_period() print(pts) print(pts.to_timestamp(how='end')) # 11.5.4从数组生成PeriodIndex data = pd.read_csv( 'D:\\test\\pydata-book-2nd-edition\\examples\\macrodata.csv') pd.set_option('display.max_columns', None) pd.set_option('display.max_rows', None)
from pandas import DataFrame,Series import pandas as pd import numpy as np res = pd.read_table('equity_funds_ret.txt',sep='\s+',skiprows=[1,2]) funds = list(pd.value_counts(res['wficn']).index) # funds.remove(100218) min_date = 20160331 max_date = 20160331 # for fundID in funds: # temp = res[res['wficn']==fundID].values[0,1] #这里索引不太对 # if temp < min_date: # min_date = temp min_date = 19611229 date_index = pd.period_range(start=str(int(min_date)),end=str(int(max_date)),freq='M') data = DataFrame(None,index = date_index,columns=funds) for fundID in funds: temp = res[res['wficn']==fundID] temp = temp.dropna() temp_dateIndex = pd.to_datetime(temp['date'],format = '%Y%m%d') temp_series = Series(temp['ret'].values,index = temp_dateIndex) temp_series = temp_series.to_period(freq='M') data[fundID] = temp_series data.to_csv('fund_return.csv')
#按季度计算的时期频率 #表示该财年以2012年的1月作为结尾,所以第四季度2012Q4是从2011年11月到2012年1月 p3 = pd.Period('2012Q4', freq='Q-JAN') #获取该季度倒是第二个工作日下午4点的时间戳 p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60 #转为Timestamp p4pm.to_timestamp() #生成季度性范围 pr1 = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN') ts4 = Series(np.arange(len(pr1)), index=pr1) #将Timestamp转为Period pr2 = pd.date_range('1/1/2000', periods=3, freq='M') ts5 = Series(np.random.randn(3), index=pr2) #将由时间戳作为索引的Series或DataFrame转换为以时期索引 ts5.to_period() #转为时间戳 ts5.to_timestamp() #频率由日变为月 pr3 = pd.date_range('1/29/2000', periods=6, freq='D') ts6 = Series(np.random.randn(6), index=pr3) #新的频率默认由时间戳推断而来,所以会出现重复的时期 ts6.to_period('M') #通过数组创建PeriodIndex #这个数据集中年度和季度被存放在不同列中,现在要把这两列合并 data = pd.read_csv('macrodata.csv') #将这两个数组以及一个频率传入PeriodIndex,就可以合成DataFrame的一个索引 index9 = pd.PeriodIndex(year=data.year, quarter=data.quarter, freq='Q-DEC') data.index = index9
# In[180]: rng # In[181]: ts = Series(np.random.randn(3), index=rng) # In[182]: ts # In[183]: pts = ts.to_period() # In[184]: pts # 时期指的是非重叠区间,因此对于给定的频率,一个时间戳只能属于一个时期 # In[185]: rng = pd.date_range('1/29/2000', periods=6, freq='D') # In[186]: rng
# * to_period() <==> to_timestamp() # In[ ]: rng = pd.date_range('1/1/2000', periods=3, freq='M') ts = Series(randn(3), index=rng) ts # #### # to_period()에 의해 변환되는 빈도(freq.)는 추정된다 # In[ ]: pts = ts.to_period() pts # #### # 빈도를 지정할 수도 있음 # * 중복되는 시간 인덱스가 나타날 수 있음 # In[ ]: rng = pd.date_range('1/29/2000', periods=6, freq='D') ts2 = Series(randn(6), index=rng) ts2 ts2.to_period('M')
print(now + MonthEnd()) print(now + MonthEnd(2)) offset = MonthEnd() print(offset.rollforward(now)) print(offset.rollback(now)) ts = Series(np.random.randn(20), index=pd.date_range('1/15/2000', periods=20, freq='4d')) print(ts) print(ts.groupby(offset.rollforward).mean()) print(ts.resample('M').mean()) rng = pd.date_range('1/1/2000', periods=3, freq='M') ts = Series(np.random.randn(3), index=rng) pts = ts.to_period() print(ts) print(pts) rng = pd.date_range('1/29/2000', periods=6, freq='D') ts2 = Series(np.random.randn(6), index=rng) print(ts2.to_period('M')) pts = ts.to_period() print(pts) print(pts.to_timestamp(how='end')) data = pd.read_csv('macrodata.csv') print(data.year) print(data.quarter) index = pd.PeriodIndex(year=data.year, quarter=data.quarter, freq='Q-DEC')