def test_asfreq(self): ts = Series([0., 1., 2.], index=[datetime(2009, 10, 30), datetime( 2009, 11, 30), datetime(2009, 12, 31)]) daily_ts = ts.asfreq('B') monthly_ts = daily_ts.asfreq('BM') assert_series_equal(monthly_ts, ts) daily_ts = ts.asfreq('B', method='pad') monthly_ts = daily_ts.asfreq('BM') assert_series_equal(monthly_ts, ts) daily_ts = ts.asfreq(BDay()) monthly_ts = daily_ts.asfreq(BMonthEnd()) assert_series_equal(monthly_ts, ts) result = ts[:0].asfreq('M') self.assertEqual(len(result), 0) self.assertIsNot(result, ts) daily_ts = ts.asfreq('D', fill_value=-1) result = daily_ts.value_counts().sort_index() expected = Series([60, 1, 1, 1], index=[-1.0, 2.0, 1.0, 0.0]).sort_index() assert_series_equal(result, expected)
def test_tz_aware_asfreq(self, tz): dr = date_range('2011-12-01', '2012-07-20', freq='D', tz=tz) ser = Series(np.random.randn(len(dr)), index=dr) # it works! ser.asfreq('T')
def test_tz_aware_asfreq(self): dr = date_range("2011-12-01", "2012-07-20", freq="D", tz=self.tzstr("US/Eastern")) s = Series(np.random.randn(len(dr)), index=dr) # it works! s.asfreq("T")
def test_tz_aware_asfreq(self): dr = date_range('2011-12-01','2012-07-20',freq = 'D', tz = 'US/Eastern') s = Series(np.random.randn(len(dr)), index=dr) # it works! s.asfreq('T')
def test_asfreq_non_unique(): # GH #1077 rng = date_range('1/1/2000', '2/29/2000') rng2 = rng.repeat(2).values ts = Series(np.random.randn(len(rng2)), index=rng2) msg = 'cannot reindex from a duplicate axis' with pytest.raises(ValueError, match=msg): ts.asfreq('B')
def test_asfreq_ts(self): index = PeriodIndex(freq='A', start='1/1/2001', end='12/31/2010') ts = Series(np.random.randn(len(index)), index=index) df = DataFrame(np.random.randn(len(index), 3), index=index) result = ts.asfreq('D', how='end') df_result = df.asfreq('D', how='end') exp_index = index.asfreq('D', how='end') assert len(result) == len(ts) tm.assert_index_equal(result.index, exp_index) tm.assert_index_equal(df_result.index, exp_index) result = ts.asfreq('D', how='start') assert len(result) == len(ts) tm.assert_index_equal(result.index, index.asfreq('D', how='start'))
def test_upsample_with_limit(self): rng = period_range("1/1/2000", periods=5, freq="A") ts = Series(np.random.randn(len(rng)), rng) result = ts.resample("M", fill_method="ffill", limit=2, convention="end") expected = ts.asfreq("M").reindex(result.index, method="ffill", limit=2) assert_series_equal(result, expected)
def test_annual_upsample(self): targets = ["D", "B", "M"] for month in MONTHS: ts = _simple_pts("1/1/1990", "12/31/1995", freq="A-%s" % month) for targ, conv, meth in product(targets, ["start", "end"], ["ffill", "bfill"]): result = ts.resample(targ, fill_method=meth, convention=conv) expected = result.to_timestamp(targ, how=conv) expected = expected.asfreq(targ, meth).to_period() assert_series_equal(result, expected) df = DataFrame({"a": ts}) rdf = df.resample("D", fill_method="ffill") exp = df["a"].resample("D", fill_method="ffill") assert_series_equal(rdf["a"], exp) rng = period_range("2000", "2003", freq="A-DEC") ts = Series([1, 2, 3, 4], index=rng) result = ts.resample("M", fill_method="ffill") ex_index = period_range("2000-01", "2003-12", freq="M") expected = ts.asfreq("M", how="start").reindex(ex_index, method="ffill") assert_series_equal(result, expected)
def test_asfreq(self): ts = Series([0., 1., 2.], index=[datetime(2009, 10, 30), datetime(2009, 11, 30), datetime(2009, 12, 31)]) daily_ts = ts.asfreq('WEEKDAY') monthly_ts = daily_ts.asfreq('EOM') self.assert_(np.array_equal(monthly_ts, ts)) daily_ts = ts.asfreq('WEEKDAY', method='pad') monthly_ts = daily_ts.asfreq('EOM') self.assert_(np.array_equal(monthly_ts, ts)) daily_ts = ts.asfreq(datetools.bday) monthly_ts = daily_ts.asfreq(datetools.bmonthEnd) self.assert_(np.array_equal(monthly_ts, ts))
def test_annual_upsample(self): targets = ['D', 'B', 'M'] for month in MONTHS: ts = _simple_pts('1/1/1990', '12/31/1995', freq='A-%s' % month) for targ, conv, meth in product(targets, ['start', 'end'], ['ffill', 'bfill']): result = ts.resample(targ, fill_method=meth, convention=conv) expected = result.to_timestamp(targ, how=conv) expected = expected.asfreq(targ, meth).to_period() assert_series_equal(result, expected) df = DataFrame({'a' : ts}) rdf = df.resample('D', fill_method='ffill') exp = df['a'].resample('D', fill_method='ffill') assert_series_equal(rdf['a'], exp) rng = period_range('2000', '2003', freq='A-DEC') ts = Series([1, 2, 3, 4], index=rng) result = ts.resample('M', fill_method='ffill') ex_index = period_range('2000-01', '2003-12', freq='M') expected = ts.asfreq('M', how='start').reindex(ex_index, method='ffill') assert_series_equal(result, expected)
def test_upsample_with_limit(self): rng = period_range("1/1/2000", periods=5, freq="A") ts = Series(np.random.randn(len(rng)), rng) result = ts.resample("M", convention="end").ffill(limit=2) expected = ts.asfreq("M").reindex(result.index, method="ffill", limit=2) tm.assert_series_equal(result, expected)
def test_upsample_with_limit(self): rng = period_range('1/1/2000', periods=5, freq='A') ts = Series(np.random.randn(len(rng)), rng) result = ts.resample('M', convention='end').ffill(limit=2) expected = ts.asfreq('M').reindex(result.index, method='ffill', limit=2) assert_series_equal(result, expected)
def test_asfreq_normalize(self): rng = date_range('1/1/2000 09:30', periods=20) norm = date_range('1/1/2000', periods=20) vals = np.random.randn(20) ts = Series(vals, index=rng) result = ts.asfreq('D', normalize=True) norm = date_range('1/1/2000', periods=20) expected = Series(vals, index=norm) assert_series_equal(result, expected) vals = np.random.randn(20, 3) ts = DataFrame(vals, index=rng) result = ts.asfreq('D', normalize=True) expected = DataFrame(vals, index=norm) assert_frame_equal(result, expected)
def test_asfreq(self): ts = Series([0., 1., 2.], index=[datetime(2009, 10, 30), datetime( 2009, 11, 30), datetime(2009, 12, 31)]) daily_ts = ts.asfreq('B') monthly_ts = daily_ts.asfreq('BM') self.assert_series_equal(monthly_ts, ts) daily_ts = ts.asfreq('B', method='pad') monthly_ts = daily_ts.asfreq('BM') self.assert_series_equal(monthly_ts, ts) daily_ts = ts.asfreq(datetools.bday) monthly_ts = daily_ts.asfreq(datetools.bmonthEnd) self.assert_series_equal(monthly_ts, ts) result = ts[:0].asfreq('M') self.assertEqual(len(result), 0) self.assertIsNot(result, ts)
def test_asfreq_fillvalue(self): # test for fill value during upsampling, related to issue 3715 # setup rng = date_range("1/1/2016", periods=10, freq="2S") ts = Series(np.arange(len(rng)), index=rng) df = DataFrame({"one": ts}) # insert pre-existing missing value df.loc["2016-01-01 00:00:08", "one"] = None actual_df = df.asfreq(freq="1S", fill_value=9.0) expected_df = df.asfreq(freq="1S").fillna(9.0) expected_df.loc["2016-01-01 00:00:08", "one"] = None tm.assert_frame_equal(expected_df, actual_df) expected_series = ts.asfreq(freq="1S").fillna(9.0) actual_series = ts.asfreq(freq="1S", fill_value=9.0) tm.assert_series_equal(expected_series, actual_series)
def test_mixed_freq_alignment(self): ts_ind = date_range("2012-01-01 13:00", "2012-01-02", freq="H") ts_data = np.random.randn(12) ts = Series(ts_data, index=ts_ind) ts2 = ts.asfreq("T").interpolate() ax = ts.plot() ts2.plot(style="r") self.assertEqual(ax.lines[0].get_xdata()[0], ax.lines[1].get_xdata()[0])
def test_mixed_freq_alignment(self): ts_ind = date_range('2012-01-01 13:00', '2012-01-02', freq='H') ts_data = np.random.randn(12) ts = Series(ts_data, index=ts_ind) ts2 = ts.asfreq('T').interpolate() ax = ts.plot() ts2.plot(style='r') assert ax.lines[0].get_xdata()[0] == ax.lines[1].get_xdata()[0]
def test_resample_weekly_all_na(self): rng = date_range('1/1/2000', periods=10, freq='W-WED') ts = Series(np.random.randn(len(rng)), index=rng) result = ts.resample('W-THU') self.assert_(result.isnull().all()) result = ts.resample('W-THU', fill_method='ffill')[:-1] expected = ts.asfreq('W-THU', method='ffill') assert_series_equal(result, expected)
def test_resample_weekly_all_na(self): rng = date_range("1/1/2000", periods=10, freq="W-WED") ts = Series(np.random.randn(len(rng)), index=rng) result = ts.resample("W-THU") self.assert_(result.isnull().all()) result = ts.resample("W-THU", fill_method="ffill")[:-1] expected = ts.asfreq("W-THU", method="ffill") assert_series_equal(result, expected)
def test_resample_weekly_all_na(self): rng = date_range("1/1/2000", periods=10, freq="W-WED") ts = Series(np.random.randn(len(rng)), index=rng) result = ts.resample("W-THU").asfreq() assert result.isna().all() result = ts.resample("W-THU").asfreq().ffill()[:-1] expected = ts.asfreq("W-THU").ffill() assert_series_equal(result, expected)
def make_ts(self, v): x = v['data']['x']['data'] new_x = [] for tp in x: new_x.append(parser.parse(tp)) y = v['data']['y'][0]['data'] new_y = [] for tp in y: new_y.append(float(tp)) ts = Series(new_y, index=new_x) ts = ts.asfreq('D', method='pad') return ts
def test_monthly_convention_span(self): rng = period_range("2000-01", periods=3, freq="M") ts = Series(np.arange(3), index=rng) # hacky way to get same thing exp_index = period_range("2000-01-01", "2000-03-31", freq="D") expected = ts.asfreq("D", how="end").reindex(exp_index) expected = expected.fillna(method="bfill") result = ts.resample("D").mean() tm.assert_series_equal(result, expected)
def test_mixed_freq_alignment(self): import matplotlib.pyplot as plt ts_ind = date_range('2012-01-01 13:00', '2012-01-02', freq='H') ts_data = np.random.randn(12) ts = Series(ts_data, index=ts_ind) ts2 = ts.asfreq('T').interpolate() plt.close('all') ax = ts.plot() ts2.plot(style='r') self.assert_(ax.lines[0].get_xdata()[0] == ax.lines[1].get_xdata()[0])
def set_freq(price: pd.Series, dividend: pd.Series = None, freq: any = None, groupby: bool = True, method: any = 'mean', ffill: bool = True) -> pd.Series: """ Set the frequency for the given price / dividend. Args: price: the price time-series to set the frequency for dividend: any dividend paid to set the frequency for freq: the frequency of periods for calculating returns groupby: whether to use groupby or asfreq method: the method to use for aggregating the time frequency group by ffill: whether to forward fill missing values (i.e., NaN values) Returns: a tuple of: - the price after setting the frequency - the dividend after setting the frequency """ if freq is not None: # adjust the frequency of the data if groupby: # use a groupby to set the frequency price = price.groupby(pd.Grouper(freq=freq)).agg(method) else: # just use asfreq (i.e., take the last value in the period) price = price.asfreq(freq, method=method) if dividend is not None: # adjust the frequency of the dividend if groupby: # use a groupby to set the frequency dividend = dividend.groupby(pd.Grouper(freq=freq)).agg(method) else: # just use asfreq (i.e., take the last value in the period) dividend = dividend.asfreq(freq, method=method) if ffill: # forward fill missing values price = price.ffill() if dividend is not None: dividend = dividend.ffill() return price, dividend
def test_mixed_freq_alignment(self): import matplotlib.pyplot as plt ts_ind = date_range("2012-01-01 13:00", "2012-01-02", freq="H") ts_data = np.random.randn(12) ts = Series(ts_data, index=ts_ind) ts2 = ts.asfreq("T").interpolate() plt.close("all") ax = ts.plot() ts2.plot(style="r") self.assert_(ax.lines[0].get_xdata()[0] == ax.lines[1].get_xdata()[0])
def test_annual_upsample(self): ts = _simple_pts("1/1/1990", "12/31/1995", freq="A-DEC") df = DataFrame({"a": ts}) rdf = df.resample("D", fill_method="ffill") exp = df["a"].resample("D", fill_method="ffill") assert_series_equal(rdf["a"], exp) rng = period_range("2000", "2003", freq="A-DEC") ts = Series([1, 2, 3, 4], index=rng) result = ts.resample("M", fill_method="ffill") ex_index = period_range("2000-01", "2003-12", freq="M") expected = ts.asfreq("M", how="start").reindex(ex_index, method="ffill") assert_series_equal(result, expected)
def test_add_series_with_period_index(self): rng = pd.period_range('1/1/2000', '1/1/2010', freq='A') ts = Series(np.random.randn(len(rng)), index=rng) result = ts + ts[::2] expected = ts + ts expected[1::2] = np.nan tm.assert_series_equal(result, expected) result = ts + _permute(ts[::2]) tm.assert_series_equal(result, expected) msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)" with pytest.raises(IncompatibleFrequency, match=msg): ts + ts.asfreq('D', how="end")
def test_annual_upsample(self, simple_period_range_series): ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="A-DEC") df = DataFrame({"a": ts}) rdf = df.resample("D").ffill() exp = df["a"].resample("D").ffill() tm.assert_series_equal(rdf["a"], exp) rng = period_range("2000", "2003", freq="A-DEC") ts = Series([1, 2, 3, 4], index=rng) result = ts.resample("M").ffill() ex_index = period_range("2000-01", "2003-12", freq="M") expected = ts.asfreq("M", how="start").reindex(ex_index, method="ffill") tm.assert_series_equal(result, expected)
def test_annual_upsample(self): ts = _simple_pts('1/1/1990', '12/31/1995', freq='A-DEC') df = DataFrame({'a': ts}) rdf = df.resample('D', fill_method='ffill') exp = df['a'].resample('D', fill_method='ffill') assert_series_equal(rdf['a'], exp) rng = period_range('2000', '2003', freq='A-DEC') ts = Series([1, 2, 3, 4], index=rng) result = ts.resample('M', fill_method='ffill') ex_index = period_range('2000-01', '2003-12', freq='M') expected = ts.asfreq('M', how='start').reindex(ex_index, method='ffill') assert_series_equal(result, expected)
def test_align_series(self): rng = period_range('1/1/2000', '1/1/2010', freq='A') ts = Series(np.random.randn(len(rng)), index=rng) result = ts + ts[::2] expected = ts + ts expected[1::2] = np.nan tm.assert_series_equal(result, expected) result = ts + _permute(ts[::2]) tm.assert_series_equal(result, expected) # it works! for kind in ['inner', 'outer', 'left', 'right']: ts.align(ts[::2], join=kind) msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)" with tm.assert_raises_regex(period.IncompatibleFrequency, msg): ts + ts.asfreq('D', how="end")
def getFund(id): count = cur.execute('SELECT * FROM profit where id = ' + str(id)) results = cur.fetchall() valueYuan = [] valueYuanP = [] dateYuan = [] for r1 in results: name = r1[1] valueYuan.append(r1[2] / 100) dateYuan.append( datetime.datetime(int(str(r1[4]).split('-')[0]), int(str(r1[4]).split('-')[1]), int(str(r1[4]).split('-')[2]))) tsYuan = Series(valueYuan, index=dateYuan) tsfYuanm = tsYuan.asfreq('M', method='pad') print str(id) + ": " + str((len(tsfYuanm))) + "months ", dateP = [] for (k, d) in tsfYuanm.iteritems(): #print k, d valueYuanP.append(d) dateP.append(str(k).split(" ")[0]) valueYuanP = dayProfit(valueYuanP) print "start from: " + dateP[0] + " to :" + dateP[len(dateP) - 1] print name + ": ", period = dateP[0] + " to " + dateP[len(dateP) - 1] printResult(name, period, valueYuanP) tsfYuanmP = Series(valueYuanP, index=dateP) valueYuanPrice = [x + 1 for x in valueYuan] '''plot full period plt.plot(dateYuan,valueYuanPrice) plt.title(str(id)) fname=str(id)+'.png' plt.savefig(fname, dpi=75) #plt.show() ''' return tsfYuanmP
index p = pd.Period('2007', freq='A-DEC') p.asfreq('M', how='start') p.asfreq('M', how='end') p = pd.Period('2007', freq='A-JUN') p.asfreq('M', 'start') p.asfreq('M', 'end') p = pd.Period('Aug-2007', 'M') p.asfreq('A-JUN') rng = pd.period_range('2006', '2009', freq='A-DEC') ts = Series(np.random.randn(len(rng)), index=rng) ts ts.asfreq('M', how='start') ts.asfreq('B', how='end') p = pd.Period('2012Q4', freq='Q-JAN') p p.asfreq('D', 'start') p.asfreq('D', 'end') p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60 p4pm p4pm.to_timestamp() rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN') ts = Series(np.arange(len(rng)), index=rng) ts new_rng = (rng.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60 ts.index = new_rng.to_timestamp() ts
j = ts2 + ts1.reindex(ts2.index, method='ffill') print(j) gdp = Series([1.78, 1.94, 2.08, 2.01, 2.15, 2.31, 2.46], index=pd.period_range('1984Q2', periods=7, freq='Q-SEP')) print(gdp) infl = Series([0.025, 0.045, 0.037, 0.04], index=pd.period_range('1982', periods=4, freq='A-DEC')) print(infl) # 显然和gdp的时间频率不一样" #跟timestamp的时间序列不同,由period索引的两个不同频率的时间序列之间 #必须进行显式转换 #调整季度 infl_q = infl.asfreq('Q-SEP', how='end') print(infl_q) #索引匹配并填充缺失值 k = infl_q.reindex(gdp.index, method='ffill') print(k) #时间和最当前数据提取 rng = pd.date_range('2012-06-01 09:30', '2012-06-01 15:59', freq='T') # 交易时段按分钟采样 rng = rng.append([rng + pd.offsets.BDay(i) for i in range(1, 4)]) # 再补4天 ts = Series(np.arange(len(rng), dtype=float), index=rng) print(ts.head()) print(ts.tail()) #利用python的datetime.time对象进行索引即可抽取出这些时间点上的值 print(ts[time(10, 0)]) #抽取10点的数据
print(ts1.resample('B').ffill()) dates = pd.DatetimeIndex(['2012-6-12', '2012-6-17', '2012-6-18', '2012-6-21', '2012-6-22', '2012-6-29']) ts2 = Series(np.random.randn(6), index=dates) print(ts2) print(ts1.reindex(ts2.index, method='ffill')) print(ts2 + ts1.reindex(ts2.index, method='ffill')) gdp = Series([1.78, 1.94, 2.08, 2.01, 2.15, 2.31, 2.46], index=pd.period_range('1984Q2', periods=7, freq='Q-SEP')) inf1 = Series([0.025, 0.045, 0.037, 0.04], index=pd.period_range('1982', periods=4, freq='A-DEC')) inf1_q = inf1.asfreq('Q-SEP', how='end') print(inf1_q) print(inf1_q.reindex(gdp.index, method='ffill')) rng = pd.date_range('2012-06-01 09:30', '2012-06-01 15:59', freq='T') rng = rng.append([rng + pd.offsets.BDay(i) for i in range(1, 4)]) ts = Series(np.arange(len(rng), dtype=float), index=rng) print(ts) print(ts[time(10, 0)]) print(ts.at_time(time(10, 0))) print(ts.between_time(time(10, 0), time(10, 1))) indexer = np.sort(np.random.permutation(len(ts))[700:]) irr_ts = ts.copy() irr_ts[indexer] = np.nan
# #### # PeriodIndex 객체도 마찬가지로 다루자 # In[ ]: rng = pd.period_range('2006', '2009', freq='A-DEC'); rng ts = Series(np.random.randn(len(rng)), index=rng) ts # In[ ]: ts.asfreq ts.asfreq('M', how='start') # In[ ]: ts.asfreq('B', how='end') # ### 10.5.2 Quarterly period frequencies # * 회계 연도의 끝에 따라 의미가 달라짐 # * 12 가지의 분기 빈도: Q-JAN ~ Q-DEC # - 4/4분기의 마지막 달이 Q- 다음에 오는 달 # In[ ]:
def _freq_to_period(x: pd.Series, freq: Frequency = Frequency.YEAR): """ Given input series x with a DateTimeIndex and a desired temporal frequency (period), returns x with all NaNs forward-filled (according to x's index's DateTime frequency) and the number of data points in a period. freq should be the length of time in which x's cycles repeat. For example: yearly retail sales cycle, yearly temperature fluctuation cycle. For example: 1) If x is a daily series and freq = YEARLY, then there are 365 data points in a period; 2) If x is a monthly series and freq = QUARTERLY, then there are 3 data points in a period. Freq parameter only applies when data frequency is: 'B' and frequency == Weekly --> period = 5 'B' and frequency == Monthly --> convert to 'D' and period = 30 'D' and frequency == Weekly --> period = 7 'D' and frequency == Monthly --> period = 30 'M' and frequency == Quarterly --> Period = 3 'W' and frequency == Quarterly --> period = 13 """ if not isinstance(x.index, pd.DatetimeIndex): raise MqValueError("Series must have a pandas.DateTimeIndex.") pfreq = getattr(getattr(x, 'index', None), 'inferred_freq', None) try: period = statsmodels.tsa.seasonal.freq_to_period(pfreq) except (ValueError, AttributeError): period = None if period in [7, None]: # daily x = x.asfreq('D', method='ffill') if freq == Frequency.YEAR: return x, 365 elif freq == Frequency.QUARTER: return x, 91 elif freq == Frequency.MONTH: return x, 30 else: return x, 7 elif period == 5: # business day if freq == Frequency.YEAR: return x.asfreq('D', method='ffill'), 365 if freq == Frequency.QUARTER: return x.asfreq('D', method='ffill'), 91 elif freq == Frequency.MONTH: return x.asfreq('D', method='ffill'), 30 else: # freq == Frequency.WEEKLY: return x.asfreq('B', method='ffill'), 5 elif period == 52: # weekly frequency x = x.asfreq('W', method='ffill') if freq == Frequency.YEAR: return x, period elif freq == Frequency.QUARTER: return x, 13 elif freq == Frequency.MONTH: return x, 4 else: raise MqValueError( f'Frequency {freq.value} not compatible with series with frequency {pfreq}.' ) elif period == 12: # monthly frequency x = x.asfreq('M', method='ffill') if freq == Frequency.YEAR: return x, period elif freq == Frequency.QUARTER: return x, 3 else: raise MqValueError( f'Frequency {freq.value} not compatible with series with frequency {pfreq}.' ) return x, period
def main(st, et): if st: start_time = st else: start_time = arrow.utcnow().replace(minutes=common.DEFAULT_LOOKBACK_MINUTES) if et: end_time = et else: end_time = arrow.utcnow() all_regions = set() all_product_descriptions = set() all_instance_types = set() all_instance_zones = set() session = botocore.session.get_session() ec2 = session.get_service('ec2') operation = ec2.get_operation('DescribeSpotPriceHistory') local_timeseries = {} vals = {} tss = {} print 'Preparing...' for region in AWS_ON_DEMAND_PRICES: reg_key = region.replace('-','_') if region not in vals: vals[reg_key] = {} tss[reg_key] = {} for zone in AWS_REGIONS_TO_ZONES[region]: # print 'Zone: %s' % zone if zone not in vals[reg_key]: vals[reg_key][zone] = {} tss[reg_key][zone] = {} for product in AWS_ON_DEMAND_PRICES[region]: # print 'Product: %s' % product if not AWS_ON_DEMAND_PRICES[region][product]: print "WARNING: Empty %s:%s" % (region, product) continue if product not in vals[reg_key][zone]: vals[reg_key][zone][product] = {} tss[reg_key][zone][product] = {} for inst_type in common.AWS_ON_DEMAND_PRICES[region][product]: # print "%s/%s/%s/%s" % (reg_key, zone, product, inst_type) vals[reg_key][zone][product][inst_type] = [] tss[reg_key][zone][product][inst_type] = [] #sys.exit(1) for region in ec2.region_names: all_regions.add(region) cnt = 0 next_token = None print 'Collecting spot prices from region: %s for %s to %s' % (region, start_time.format(_FMT), end_time.format(_FMT)) sys.stdout.flush() # if region != 'us-east-1': #continue while True: endpoint = ec2.get_endpoint(region) if next_token: response, data = operation.call( endpoint, start_time=start_time.format(_FMT), end_time=end_time.format(_FMT), next_token=next_token, ) else: response, data = operation.call( endpoint, start_time=start_time.format(_FMT), ) next_token = data.get('NextToken') spot_data = data.get('SpotPriceHistory', []) first_entry_in_batch = True sys.stdout.flush() for d in spot_data: ts = common.ts_from_aws(d) if first_entry_in_batch: print "Fetched %s records starting with %s" % (len(spot_data), d['Timestamp']) first_entry_in_batch = False # {u'Timestamp': '2014-04-10T23:49:21.000Z', u'ProductDescription': 'Linux/UNIX (Amazon VPC)', u'InstanceType': 'hi1.4xlarge', u'SpotPrice': '0.128300', u'AvailabilityZone': 'us-east-1b'} reg_key = region.replace('-','_') d['Region'] = reg_key d['InstanceTypeNorm'] = d['InstanceType'].replace('.','_') value = d['SpotPrice'] zone = d['AvailabilityZone'].replace('-','_') product = d['ProductDescription'].replace('-','_').replace('(','').replace(')','_').replace(' ','_').replace('/','_') if product.endswith('_'): product=product[:-1] inst_type = d['InstanceTypeNorm'].replace('-','_') tags = { 'cloud' : 'aws', 'region' : reg_key, 'zone' : zone, 'product' : product, 'inst_type' : inst_type, 'units' : 'USD' } try: vals[reg_key][zone][product][inst_type].append(value) tss[reg_key][zone][product][inst_type].append(ts) except KeyError: print "No on-demand info for %s/%s/%s/%s" % (reg_key,zone,product,inst_type) common.otsdb_send('price_spot', value, tags, ts, False) tags['price_type'] = 'spot' common.otsdb_send('price', value, tags, ts, False) cnt += 1 if not next_token: break print "Found %s price points" % cnt for zone in tss[reg_key]: for product in tss[reg_key][zone]: for inst_type in tss[reg_key][zone][product]: if not tss[reg_key][zone][product][inst_type]: print "No spot info for %s/%s/%s/%s." % (reg_key, zone, product, inst_type) continue print "%s/%s/%s/%s" % (reg_key, zone, product, inst_type) tags = { 'cloud' : 'aws', 'region' : reg_key, 'zone' : zone, 'product' : product, 'inst_type' : inst_type, 'units' : 'USD' } tss_ts = tss[reg_key][zone][product][inst_type] tss_ts.sort() tss_dt = to_datetime(tss_ts, unit='s') s_data = vals[reg_key][zone][product][inst_type] s1 = Series(s_data, tss_dt) # print "Creating Series(%s, %s) from %s; length: %s" % (s_data, tss_dt, tss_ts, len(s1)) if len(s1) > 1: # We already took care of 1-length (no fill) s2 = s1.asfreq('1Min', method='ffill') # print "Sparse series:\n%s\n" % s1 # print "Filled series:\n%s\n" % s2 # print "Sparse: %s, filled: %s" % (len(s1), len(s2)) for (dt,value) in s2.iteritems(): ts = arrow.Arrow.fromdatetime(dt).timestamp common.otsdb_send('price_spot', value, tags, ts, False) tags['price_type'] = 'spot' common.otsdb_send('price', value, tags, ts, False) sys.stdout.flush()
dates = pd.DatetimeIndex(['2012-6-12', '2012-6-17', '2012-6-18', '2012-6-21', '2012-6-22', '2012-6-29']) ts2 = Series(np.random.randn(6), index=dates) print ts2 # 将ts1中最当前的值向前填充加到ts2上, 即维持ts2的索引 # 先使用ts2的索引来填充ts1的值 print ts1.reindex(ts2.index, method='ffill') # 然后在加上去 print ts2 + ts1.reindex(ts2.index, method='ffill') # 使用Period表示时间区间 gdp = Series([1.78, 1.94, 2.08, 2.01, 2.15, 2.31, 2.46], index=pd.period_range('1984Q2', periods=7, freq='Q-SEP')) infl = Series([0.025, 0.045, 0.037, 0.04], index=pd.period_range('1982', periods=4, freq='A-DEC')) print gdp print infl # 和Timestamp的时间序列不同Q-SEP得到该频率下的正确时间 infl_q = infl.asfreq('Q-SEP', how='end') print infl_q # 重索引 print infl_q.reindex(gdp.index, method='ffill')
# 11.5.1区间频率转换 p = pd.Period('2007', freq='A-DEC') print(p) print(p.asfreq('M', how='start')) print(p.asfreq('M', how='end')) p = pd.Period('2007', freq='A-JUN') print(p) print(p.asfreq('M', 'start')) print(p.asfreq('M', 'end')) p = pd.Period('Aug-2007', 'M') print(p.asfreq('A-JUN')) rng = pd.period_range('2006', '2009', freq='A-DEC') ts = Series(np.random.randn(len(rng)), index=rng) print(ts) print(ts.asfreq('M', how='start')) print(ts.asfreq('B', how='end')) # 11.5.2季度区间频率 p = pd.Period('2012Q4', freq='Q-JAN') print(p) print(p.asfreq('D', 'start')) print(p.asfreq('D', 'end')) p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60 print(p4pm) rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN') ts = Series(np.arange(len(rng)), index=rng) print(ts)
p.asfreq('M', how='end') # In[28]: p = pd.Period('2007-08', 'M') p.asfreq('A-JUN') # In[35]: rng = pd.date_range('2007', '2010', freq='A-DEC') ts = Series(np.random.randn(len(rng)), index=rng) ts # In[36]: ts.asfreq('M', how='start') # In[37]: p = pd.Period('2012Q4', freq='Q-JAN') p # In[38]: p.asfreq('D', 'start') # In[39]: p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60 p4pm
# print(ts1) dates = pd.DatetimeIndex([ '2012-6-12', '2012-6-17', '2012-6-18', '2012-6-21', '2012-6-22', '2012-6-29' ]) ts2 = Series(np.random.randn(6), index=dates) # print(ts2) # print(ts1.reindex(ts2.index).ffill()) # print(ts2 + ts1.reindex(ts2.index, method='ffill')) gdp = Series([1.78, 1.95, 2.08, 2.01, 2.15, 2.31, 2.46], index=pd.period_range('1984Q2', periods=7, freq='Q-SEP')) inf1 = Series([0.025, 0.045, 0.037, 0.04], index=pd.period_range('1982', periods=4, freq='A-DEC')) inf1_q = inf1.asfreq('Q-SEP', how='end') # print(gdp) # print(inf1) # print(inf1_q) # # print(inf1_q.reindex(gdp.index, method='ffill')) rng = pd.date_range('2012-06-01 09:30', '2012-06-01 15:59', freq='T') rng = rng.append([rng + pd.offsets.BDay(i) for i in range(1, 4)]) # print(rng) ts = Series(np.arange(len(rng), dtype=float), index=rng) # print(ts) #
print(index) p = pd.Period('2007', freq='A-DEC') print(p.asfreq('M', how='start')) print(p.asfreq('M', how='end')) p = pd.Period('2007', freq='A-JUN') print(p.asfreq('M', how='start')) print(p.asfreq('M', how='end')) p = pd.Period('2007-08', 'M') print(p.asfreq('A-JUN')) rng = pd.period_range('2006', '2009', freq='A-DEC') ts = Series(np.random.randn(len(rng)), index=rng) print(ts) print(ts.asfreq('M', how='start')) print(ts.asfreq('M', how='end')) p = pd.Period('2014Q4', freq='Q-JAN') print(p) print(p.asfreq('D', 'start')) print(p.asfreq('D', 'end')) p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60 print(p4pm) print(p4pm.to_timestamp()) rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN') ts = Series(np.arange(len(rng)), index=rng) new_rng = (rng.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60 ts_index = new_rng.to_timestamp()
ts2 = ts1.resample('B').ffill() # print(ts2) # # # # 如果要将ts1中"最当前"的值(即前向填充)加到ts2上.一个办法是将两者重采样为规整频率后再相加,但是如果想要维持ts2中的日期索引,则reindex回事一种更好的解决方案 # print(ts1.reindex(ts2.index).ffill()) # print(ts2+ts1.reindex(ts2.index).ffill()) # # # 使用Period # # # # period(表示时间区间)提供了另一种处理不同频率时间序列的办法,尤其是那些有着特殊规范的一年或季度为频率的金融或经济序列 gdp = Series([1.78, 1.94, 2.08, 2.01, 2.15, 2.31, 2.46], index=pd.period_range('1984Q2', periods=7, freq='Q-SEP')) infl = Series([0.025, 0.045, 0.037, 0.04], index=pd.period_range('1982', periods=4, freq='A-DEC')) # print(gdp,'\n') # print(infl) # # # # 跟timestamp的时间序列不同,由period索引的两个不同频率的时间序列之间的运算必须进行显式转换 infl_q = infl.asfreq('Q-SEP', how='E') # print(infl_q) # # # # 这个时间序列就可以被重新索引了(使用前向填充以匹配GDP) # print(infl_q.reindex(gdp.index).ffill()) # # # 时间和"最当前"数据选取 # # # # 生成一个交易日内的日期范围和时间序列 rng = pd.date_range('2012-06-01 09:30', '2012-06-01 15:59', freq='T') # # # # 生成5天的时间点(9:30~15:59之间的值) rng = rng.append([rng + pd.offsets.BDay(i) for i in range(1, 4)]) ts = Series(np.arange(len(rng), dtype=float), index=rng) # print(ts) # # # # 利用python的datetime.time对象进行索引即可抽取这些时间点上的值 # print(ts[time(10, 0)]) # # # # 实际上,该操作用到了实例方法at_time(各时间序列以及类似的DF对象都有) # print(ts.at_time(time(10,0)))