def splitData2Slice(self, winIn=120, winOut=3, stride=1): # ウィンドウ幅と、ずらし幅のoffset winInOffset = offsets.DateOffset(months=winIn, days=-1) winOutOffset = offsets.DateOffset(months=winOut, days=-1) strideOffset = offsets.DateOffset(months=stride) # 学習データの開始・終了のdatetime sTrainDT = pd.to_datetime(self.sTrain) eTrainDT = pd.to_datetime(self.eTrain) #--------------- # 各ウィンドウのdataframeを取得 self.dfX = [] self.dfY = [] # 現在の日時 currentDT = sTrainDT while currentDT + winInOffset + winOutOffset <= eTrainDT: # 現在の日時からwinInOffset分を抽出 self.dfX.append(self.dataTrain[currentDT:currentDT + winInOffset]) # 現在の日時からwinInOffset分を抽出 self.dfY.append(self.dataTrain[currentDT + winInOffset:currentDT + winInOffset + winOutOffset]) # 現在の日時をstrideOffset分ずらす currentDT = currentDT + strideOffset
def splitData2Slice(self, winIn=120, winOut=3, stride=1): # ウィンドウ幅と、ずらし幅のoffset winInOffset = offsets.DateOffset(months=winIn, days=-1) winOutOffset = offsets.DateOffset(months=winOut, days=-1) strideOffset = offsets.DateOffset(months=stride) # 学習データの開始・終了のdatetime sTrainDT = pd.to_datetime(self.sTrain) eTrainDT = pd.to_datetime(self.eTrain) #--------------- # 各ウィンドウのdataframeを取得 self.dfX = [] self.dfY = [] # 現在の日時 currentDT = sTrainDT endDTList = [] # Saito temporarily added (7/9) while currentDT + winInOffset + winOutOffset <= eTrainDT: endDTList.append(currentDT + winInOffset) # Saito temporarily added (7/9) # 現在の日時からwinInOffset分を抽出 self.dfX.append(self.dataTrain[currentDT:currentDT + winInOffset]) # 現在の日時からwinInOffset分を抽出 self.dfY.append(self.dataTrain[currentDT + winInOffset:currentDT + winInOffset + winOutOffset]) # 現在の日時をstrideOffset分ずらす currentDT = currentDT + strideOffset #--------------- return self.dfX, self.dfY, endDTList, # Saito temporarily added (7/9)
def test_shift_months(self): s = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), Timestamp( '2000-01-31 00:23:00'), Timestamp('2000-01-01'), Timestamp( '2000-02-29'), Timestamp('2000-12-31')]) for years in [-1, 0, 1]: for months in [-2, 0, 2]: actual = DatetimeIndex(tslib.shift_months(s.asi8, years * 12 + months)) expected = DatetimeIndex([x + offsets.DateOffset( years=years, months=months) for x in s]) tm.assert_index_equal(actual, expected)
def constrain_horizon( r, strict=False, cust=None, years=0, quarters=0, months=0, days=0, weeks=0, year=None, month=None, day=None, ): """Constrain a Series/DataFrame to a specified lookback period. See the documentation for dateutil.relativedelta: dateutil.readthedocs.io/en/stable/relativedelta.html Parameters ---------- r : DataFrame or Series The target pandas object to constrain strict : bool, default False If True, raise Error if the implied start date on the horizon predates the actual start date of `r`. If False, just return `r` in this situation years, months, weeks, days : int, default 0 Relative information; specify as positive to subtract periods. Adding or subtracting a relativedelta with relative information performs the corresponding aritmetic operation on the original datetime value with the information in the relativedelta quarters : int, default 0 Similar to the other plural relative info periods above, but note that this param is custom here. (It is not a standard relativedelta param) year, month, day : int, default None Absolute information; specify as positive to subtract periods. Adding relativedelta with absolute information does not perform an aritmetic operation, but rather REPLACES the corresponding value in the original datetime with the value(s) in relativedelta """ textnum = { "zero": 0, "one": 1, "two": 2, "three": 3, "four": 4, "five": 5, "six": 6, "seven": 7, "eight": 8, "nine": 9, "ten": 10, "eleven": 11, "twelve": 12, "thirteen": 13, "fourteen": 14, "fifteen": 15, "sixteen": 16, "seventeen": 17, "eighteen": 18, "nineteen": 19, "twenty": 20, "twenty four": 24, "thirty six": 36, } relativedeltas = years, quarters, months, days, weeks, year, month, day if cust is not None and any(relativedeltas): raise ValueError("Cannot specify competing (nonzero) values for both" " `cust` and other parameters.") if cust is not None: cust = cust.lower() if cust.endswith("y"): years = int(re.search(r"\d+", cust).group(0)) elif cust.endswith("m"): months = int(re.search(r"\d+", cust).group(0)) elif cust.endswith(("years ago", "year ago", "year", "years")): pos = cust.find(" year") years = textnum[cust[:pos].replace("-", "")] elif cust.endswith(("months ago", "month ago", "month", "months")): pos = cust.find(" month") months = textnum[cust[:pos].replace("-", "")] else: raise ValueError("`cust` not recognized.") # Convert quarters to months & combine for MonthOffset months += quarters * 3 # Start date will be computed relative to `end` end = r.index[-1] # Establish some funky date conventions assumed in finance. If the end # date is 6/30, the date *3 months prior* is 3/31, not 3/30 as would be # produced by dateutil.relativedelta. if end.is_month_end and days == 0 and weeks == 0: if years != 0: years *= 12 months += years start = end - offsets.MonthBegin(months) else: start = end - offsets.DateOffset( years=years, months=months, days=days - 1, weeks=weeks, year=year, month=month, day=day, ) if strict and start < r.index[0]: raise ValueError("`start` pre-dates first element of the Index, %s" % r.index[0]) return r[start:end]
# combine the datasets riverFlows = american.combine_first(columbia) # periods aren't equal in the two datasets so find the overlap # find the first month where the flow is missing for american idx_american = riverFlows \ .index[riverFlows['american_flow'].apply(np.isnan)].min() # find the last month where the flow is missing for columbia idx_columbia = riverFlows \ .index[riverFlows['columbia_flow'].apply(np.isnan)].max() # truncate the time series riverFlows = riverFlows.truncate( before=idx_columbia + ofst.DateOffset(months=1), after=idx_american - ofst.DateOffset(months=1)) # write the truncated dataset to a file with open(data_folder + 'combined_flow.csv', 'w') as o: o.write(riverFlows.to_csv(ignore_index=True)) # index is a DatetimeIndex print('\nIndex of riverFlows') print(riverFlows.index) # selecting time series data print('\ncsv_read[\'1933\':\'1934-06\']') print(riverFlows['1933':'1934-06']) # shifting the data
def test_dateoffset_misc(): oset = offsets.DateOffset(months=2, days=4) # it works oset.freqstr assert not offsets.DateOffset(months=2) == 2
def update_daily_data(self, stockslist=None, date=None, start_date=None, end_date=None, include_today=False): inds_to_update = ('pct_chg', 'close', 'adjfactor', 'maxupordown', 'trade_status', 'turn', 'amt', 'dealnum', 'mkt_cap_ard', 'mkt_cap_float_d') weekly_inds_to_update = ('close', 'adjfactor', 'maxupordown', 'pct_chg', 'trade_status', 'turn', 'dividendyield2_d', 'mkt_cap_float_d', 'pb_lf_d', 'pcf_ncf_ttm_d', 'pcf_ocf_ttm_d', 'pe_ttm_d', 'profit_ttm_d', 'ps_ttm_d', 'sec_name1_d', 'val_pe_deducted_ttm_d', 'industry_citic_d', 'industry_citic_level2_d') if self.updatefreq == 'w': inds_to_update = weekly_inds_to_update for qname in inds_to_update: new_cols, new_data = self.update_ori_data(qname, 'd', stockslist, date, start_date, end_date, include_today) if new_cols: new_date = sorted(new_cols)[-1] if qname == 'trade_status': new_data.loc[:, new_cols] = new_data.loc[:, new_cols].\ applymap(lambda x: 0 if x != '交易' else 1) elif qname == 'pct_chg' or qname == 'turn': new_data.loc[:, new_cols] = new_data.loc[:, new_cols] / 100 self.close_file(new_data, qname) print("\"{}\" data updated to date {}.".format( qname, str(new_date)[:10])) else: print(f"\"{qname}\"'s data don't need to be updated.") close, adjfactor = self._align_element(self.close, self.adjfactor) hfq_close = close * adjfactor self.close_file(hfq_close, 'hfq_close') print("\'hfq_close\' updated.") self.get_listday_matrix() print("'listday matrix' updated.") if self.updatefreq == 'M': self._update_pct_chg_nm(hfq_close) amt, dealnum = self._align_element(self.amt, self.dealnum) amt_per_deal = amt / dealnum self.close_file(amt_per_deal, 'amt_per_deal') print("'amt_per_deal' updated") self._align_month_end_to_calendar() if self.updatefreq == 'w': datelist = hfq_close.columns.tolist() lastThursday = toffsets.datetime.now() daydelta = toffsets.DateOffset(n=1) while lastThursday.weekday() != calendar.THURSDAY: lastThursday -= daydelta profit_ttm_G_d = self.profit_ttm_G_d update_dates = hfq_close.loc[:, profit_ttm_G_d. columns[-1]:lastThursday].columns[1:] yoy = pd.DataFrame() for date in update_dates: lstdate = toffsets.datetime(date.year - 1, date.month, date.day) lstdate = self._get_date(lstdate, 0, datelist) yoy[date] = self.profit_ttm_d[date] / self.profit_ttm_d[ lstdate] - 1 profit_ttm_G_d = pd.concat([profit_ttm_G_d, yoy], axis=1) profit_ttm_G_d = profit_ttm_G_d[ profit_ttm_G_d.columns.sort_values()] self.close_file(profit_ttm_G_d, 'profit_ttm_G_d') print("'profit_ttm_G_d' updated.") for offset in [1, 3, 6, 12]: pctchg_d = getattr( self, f'pctchg_{offset}M_d', ) res = pd.DataFrame() update_dates = hfq_close.loc[:, pctchg_d.columns[-1]: lastThursday].columns[1:] for date in update_dates: if offset == 12: lstyear = date.year - 1 lstmonth = date.month else: if date.month - offset > 0: lstyear = date.year lstmonth = date.month - offset else: lstyear = date.year - 1 lstmonth = date.month - offset + 12 lstday = min(date.day, calendar.monthrange(lstyear, lstmonth)[1]) lstdate = toffsets.datetime(lstyear, lstmonth, lstday) lstdate = self._get_date(lstdate, 0, datelist) res[date] = hfq_close[date] / hfq_close[lstdate] - 1 pctchg_d = pd.concat([pctchg_d, res], axis=1) pctchg_d = pctchg_d[pctchg_d.columns.sort_values()] self.close_file(pctchg_d, f'pctchg_{offset}M_d') print(f"'pctchg_{offset}M_d' updated.")
s0304 = df.loc['20031009':'20040610'] s0405 = df.loc['20041009':'20050610'] s0506 = df.loc['20051009':'20060610'] s0607 = df.loc['20061009':'20070610'] s0708 = df.loc['20071009':'20080610'] s0809 = df.loc['20081009':'20090610'] s0910 = df.loc['20091009':'20100610'] s1011 = df.loc['20101009':'20110610'] s1112 = df.loc['20111009':'20120610'] s1213 = df.loc['20121009':'20130610'] #u, indices = np.unique(s1011.index.month, return_index=True) #np.array([1, 24, 54, 85, 116, 144, 175, 205, 236]) - 1 # In[ ]: sos0304 = (pd.Timestamp('2003-10-22') + offsets.DateOffset(84)).dayofyear eos0304 = (pd.Timestamp('2004-05-26') + offsets.DateOffset(84)).dayofyear sos0405 = (pd.Timestamp('2004-10-09') + offsets.DateOffset(84)).dayofyear eos0405 = (pd.Timestamp('2005-05-28') + offsets.DateOffset(84)).dayofyear sos0506 = (pd.Timestamp('2005-10-18') + offsets.DateOffset(84)).dayofyear eos0506 = (pd.Timestamp('2006-05-30') + offsets.DateOffset(84)).dayofyear sos0607 = (pd.Timestamp('2006-10-18') + offsets.DateOffset(84)).dayofyear eos0607 = (pd.Timestamp('2007-05-30') + offsets.DateOffset(84)).dayofyear sos0708 = (pd.Timestamp('2007-10-20') + offsets.DateOffset(84)).dayofyear eos0708 = (pd.Timestamp('2008-06-02') + offsets.DateOffset(84)).dayofyear sos0809 = (pd.Timestamp('2008-10-21') + offsets.DateOffset(84)).dayofyear
index=pd.date_range('20170101', periods=100000, freq='T')) ts # In[34]: ts['2017-09-19 21:00:59':'2017-09-19 21:30:00'] # ## 5. 时间序列计算 # In[37]: from pandas.tseries import offsets dt = pd.Timestamp('2017-9-19 21:18:00') dt + offsets.DateOffset(months=1, days=2, hour=3) # ## 6. 其他方法 # ## 6.1 移动 Shifting # In[39]: ts = pd.DataFrame(np.random.randn(7, 2), columns=['Value1', 'Value2'], index=pd.date_range('20170101', periods=7, freq='T')) ts # In[40]: ts.shift(3)