def splitData2Slice(self, winIn=120, winOut=3, stride=1):

        # ウィンドウ幅と、ずらし幅のoffset
        winInOffset = offsets.DateOffset(months=winIn, days=-1)
        winOutOffset = offsets.DateOffset(months=winOut, days=-1)
        strideOffset = offsets.DateOffset(months=stride)

        # 学習データの開始・終了のdatetime
        sTrainDT = pd.to_datetime(self.sTrain)
        eTrainDT = pd.to_datetime(self.eTrain)

        #---------------
        # 各ウィンドウのdataframeを取得
        self.dfX = []
        self.dfY = []

        # 現在の日時
        currentDT = sTrainDT
        while currentDT + winInOffset + winOutOffset <= eTrainDT:

            # 現在の日時からwinInOffset分を抽出
            self.dfX.append(self.dataTrain[currentDT:currentDT + winInOffset])

            # 現在の日時からwinInOffset分を抽出
            self.dfY.append(self.dataTrain[currentDT + winInOffset:currentDT +
                                           winInOffset + winOutOffset])

            # 現在の日時をstrideOffset分ずらす
            currentDT = currentDT + strideOffset
Beispiel #2
0
    def splitData2Slice(self, winIn=120, winOut=3, stride=1):

        # ウィンドウ幅と、ずらし幅のoffset
        winInOffset = offsets.DateOffset(months=winIn, days=-1)
        winOutOffset = offsets.DateOffset(months=winOut, days=-1)
        strideOffset = offsets.DateOffset(months=stride)

        # 学習データの開始・終了のdatetime
        sTrainDT = pd.to_datetime(self.sTrain)
        eTrainDT = pd.to_datetime(self.eTrain)

        #---------------
        # 各ウィンドウのdataframeを取得
        self.dfX = []
        self.dfY = []

        # 現在の日時
        currentDT = sTrainDT
        endDTList = []  # Saito temporarily added (7/9)
        while currentDT + winInOffset + winOutOffset <= eTrainDT:
            endDTList.append(currentDT +
                             winInOffset)  # Saito temporarily added (7/9)

            # 現在の日時からwinInOffset分を抽出
            self.dfX.append(self.dataTrain[currentDT:currentDT + winInOffset])

            # 現在の日時からwinInOffset分を抽出
            self.dfY.append(self.dataTrain[currentDT + winInOffset:currentDT +
                                           winInOffset + winOutOffset])

            # 現在の日時をstrideOffset分ずらす
            currentDT = currentDT + strideOffset
        #---------------

        return self.dfX, self.dfY, endDTList,  # Saito temporarily added (7/9)
Beispiel #3
0
 def test_shift_months(self):
     s = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), Timestamp(
         '2000-01-31 00:23:00'), Timestamp('2000-01-01'), Timestamp(
             '2000-02-29'), Timestamp('2000-12-31')])
     for years in [-1, 0, 1]:
         for months in [-2, 0, 2]:
             actual = DatetimeIndex(tslib.shift_months(s.asi8, years * 12 +
                                                       months))
             expected = DatetimeIndex([x + offsets.DateOffset(
                 years=years, months=months) for x in s])
             tm.assert_index_equal(actual, expected)
Beispiel #4
0
def constrain_horizon(
    r,
    strict=False,
    cust=None,
    years=0,
    quarters=0,
    months=0,
    days=0,
    weeks=0,
    year=None,
    month=None,
    day=None,
):
    """Constrain a Series/DataFrame to a specified lookback period.

    See the documentation for dateutil.relativedelta:
    dateutil.readthedocs.io/en/stable/relativedelta.html

    Parameters
    ----------
    r : DataFrame or Series
        The target pandas object to constrain
    strict : bool, default False
        If True, raise Error if the implied start date on the horizon predates
        the actual start date of `r`.  If False, just return `r` in this
        situation
    years, months, weeks, days : int, default 0
        Relative information; specify as positive to subtract periods.  Adding
        or subtracting a relativedelta with relative information performs
        the corresponding aritmetic operation on the original datetime value
        with the information in the relativedelta
    quarters : int, default 0
        Similar to the other plural relative info periods above, but note that
        this param is custom here.  (It is not a standard relativedelta param)
    year, month, day : int, default None
        Absolute information; specify as positive to subtract periods.  Adding
        relativedelta with absolute information does not perform an aritmetic
        operation, but rather REPLACES the corresponding value in the
        original datetime with the value(s) in relativedelta
    """

    textnum = {
        "zero": 0,
        "one": 1,
        "two": 2,
        "three": 3,
        "four": 4,
        "five": 5,
        "six": 6,
        "seven": 7,
        "eight": 8,
        "nine": 9,
        "ten": 10,
        "eleven": 11,
        "twelve": 12,
        "thirteen": 13,
        "fourteen": 14,
        "fifteen": 15,
        "sixteen": 16,
        "seventeen": 17,
        "eighteen": 18,
        "nineteen": 19,
        "twenty": 20,
        "twenty four": 24,
        "thirty six": 36,
    }

    relativedeltas = years, quarters, months, days, weeks, year, month, day
    if cust is not None and any(relativedeltas):
        raise ValueError("Cannot specify competing (nonzero) values for both"
                         " `cust` and other parameters.")
    if cust is not None:
        cust = cust.lower()

        if cust.endswith("y"):
            years = int(re.search(r"\d+", cust).group(0))

        elif cust.endswith("m"):
            months = int(re.search(r"\d+", cust).group(0))

        elif cust.endswith(("years ago", "year ago", "year", "years")):
            pos = cust.find(" year")
            years = textnum[cust[:pos].replace("-", "")]

        elif cust.endswith(("months ago", "month ago", "month", "months")):
            pos = cust.find(" month")
            months = textnum[cust[:pos].replace("-", "")]

        else:
            raise ValueError("`cust` not recognized.")

    # Convert quarters to months & combine for MonthOffset
    months += quarters * 3

    # Start date will be computed relative to `end`
    end = r.index[-1]

    # Establish some funky date conventions assumed in finance.  If the end
    # date is 6/30, the date *3 months prior* is 3/31, not 3/30 as would be
    # produced by dateutil.relativedelta.

    if end.is_month_end and days == 0 and weeks == 0:
        if years != 0:
            years *= 12
            months += years
        start = end - offsets.MonthBegin(months)
    else:
        start = end - offsets.DateOffset(
            years=years,
            months=months,
            days=days - 1,
            weeks=weeks,
            year=year,
            month=month,
            day=day,
        )
    if strict and start < r.index[0]:
        raise ValueError("`start` pre-dates first element of the Index, %s" %
                         r.index[0])
    return r[start:end]
# combine the datasets
riverFlows = american.combine_first(columbia)

# periods aren't equal in the two datasets so find the overlap
# find the first month where the flow is missing for american
idx_american = riverFlows \
    .index[riverFlows['american_flow'].apply(np.isnan)].min()

# find the last month where the flow is missing for columbia
idx_columbia = riverFlows \
    .index[riverFlows['columbia_flow'].apply(np.isnan)].max()

# truncate the time series
riverFlows = riverFlows.truncate(
    before=idx_columbia + ofst.DateOffset(months=1),
    after=idx_american - ofst.DateOffset(months=1))

# write the truncated dataset to a file
with open(data_folder + 'combined_flow.csv', 'w') as o:
    o.write(riverFlows.to_csv(ignore_index=True))

# index is a DatetimeIndex
print('\nIndex of riverFlows')
print(riverFlows.index)

# selecting time series data
print('\ncsv_read[\'1933\':\'1934-06\']')
print(riverFlows['1933':'1934-06'])

# shifting the data
Beispiel #6
0
def test_dateoffset_misc():
    oset = offsets.DateOffset(months=2, days=4)
    # it works
    oset.freqstr

    assert not offsets.DateOffset(months=2) == 2
Beispiel #7
0
    def update_daily_data(self,
                          stockslist=None,
                          date=None,
                          start_date=None,
                          end_date=None,
                          include_today=False):
        inds_to_update = ('pct_chg', 'close', 'adjfactor', 'maxupordown',
                          'trade_status', 'turn', 'amt', 'dealnum',
                          'mkt_cap_ard', 'mkt_cap_float_d')

        weekly_inds_to_update = ('close', 'adjfactor', 'maxupordown',
                                 'pct_chg', 'trade_status', 'turn',
                                 'dividendyield2_d', 'mkt_cap_float_d',
                                 'pb_lf_d', 'pcf_ncf_ttm_d', 'pcf_ocf_ttm_d',
                                 'pe_ttm_d', 'profit_ttm_d', 'ps_ttm_d',
                                 'sec_name1_d', 'val_pe_deducted_ttm_d',
                                 'industry_citic_d', 'industry_citic_level2_d')

        if self.updatefreq == 'w':
            inds_to_update = weekly_inds_to_update

        for qname in inds_to_update:
            new_cols, new_data = self.update_ori_data(qname, 'd', stockslist,
                                                      date, start_date,
                                                      end_date, include_today)
            if new_cols:
                new_date = sorted(new_cols)[-1]
                if qname == 'trade_status':
                    new_data.loc[:, new_cols] = new_data.loc[:, new_cols].\
                                            applymap(lambda x: 0 if x != '交易' else 1)
                elif qname == 'pct_chg' or qname == 'turn':
                    new_data.loc[:, new_cols] = new_data.loc[:, new_cols] / 100
                self.close_file(new_data, qname)
                print("\"{}\" data updated to date {}.".format(
                    qname,
                    str(new_date)[:10]))
            else:
                print(f"\"{qname}\"'s data don't need to be updated.")

        close, adjfactor = self._align_element(self.close, self.adjfactor)
        hfq_close = close * adjfactor
        self.close_file(hfq_close, 'hfq_close')
        print("\'hfq_close\' updated.")

        self.get_listday_matrix()
        print("'listday matrix' updated.")

        if self.updatefreq == 'M':
            self._update_pct_chg_nm(hfq_close)

            amt, dealnum = self._align_element(self.amt, self.dealnum)
            amt_per_deal = amt / dealnum
            self.close_file(amt_per_deal, 'amt_per_deal')
            print("'amt_per_deal' updated")

            self._align_month_end_to_calendar()

        if self.updatefreq == 'w':
            datelist = hfq_close.columns.tolist()

            lastThursday = toffsets.datetime.now()
            daydelta = toffsets.DateOffset(n=1)
            while lastThursday.weekday() != calendar.THURSDAY:
                lastThursday -= daydelta

            profit_ttm_G_d = self.profit_ttm_G_d
            update_dates = hfq_close.loc[:, profit_ttm_G_d.
                                         columns[-1]:lastThursday].columns[1:]
            yoy = pd.DataFrame()
            for date in update_dates:
                lstdate = toffsets.datetime(date.year - 1, date.month,
                                            date.day)
                lstdate = self._get_date(lstdate, 0, datelist)
                yoy[date] = self.profit_ttm_d[date] / self.profit_ttm_d[
                    lstdate] - 1
            profit_ttm_G_d = pd.concat([profit_ttm_G_d, yoy], axis=1)
            profit_ttm_G_d = profit_ttm_G_d[
                profit_ttm_G_d.columns.sort_values()]
            self.close_file(profit_ttm_G_d, 'profit_ttm_G_d')
            print("'profit_ttm_G_d' updated.")

            for offset in [1, 3, 6, 12]:
                pctchg_d = getattr(
                    self,
                    f'pctchg_{offset}M_d',
                )
                res = pd.DataFrame()

                update_dates = hfq_close.loc[:, pctchg_d.columns[-1]:
                                             lastThursday].columns[1:]
                for date in update_dates:
                    if offset == 12:
                        lstyear = date.year - 1
                        lstmonth = date.month
                    else:
                        if date.month - offset > 0:
                            lstyear = date.year
                            lstmonth = date.month - offset
                        else:
                            lstyear = date.year - 1
                            lstmonth = date.month - offset + 12
                        lstday = min(date.day,
                                     calendar.monthrange(lstyear, lstmonth)[1])
                    lstdate = toffsets.datetime(lstyear, lstmonth, lstday)
                    lstdate = self._get_date(lstdate, 0, datelist)
                    res[date] = hfq_close[date] / hfq_close[lstdate] - 1

                pctchg_d = pd.concat([pctchg_d, res], axis=1)
                pctchg_d = pctchg_d[pctchg_d.columns.sort_values()]
                self.close_file(pctchg_d, f'pctchg_{offset}M_d')
                print(f"'pctchg_{offset}M_d' updated.")
s0304 = df.loc['20031009':'20040610']
s0405 = df.loc['20041009':'20050610']
s0506 = df.loc['20051009':'20060610']
s0607 = df.loc['20061009':'20070610']
s0708 = df.loc['20071009':'20080610']
s0809 = df.loc['20081009':'20090610']
s0910 = df.loc['20091009':'20100610']
s1011 = df.loc['20101009':'20110610']
s1112 = df.loc['20111009':'20120610']
s1213 = df.loc['20121009':'20130610']
#u, indices = np.unique(s1011.index.month, return_index=True)
#np.array([1,  24,  54, 85, 116, 144, 175, 205, 236]) - 1

# In[ ]:

sos0304 = (pd.Timestamp('2003-10-22') + offsets.DateOffset(84)).dayofyear
eos0304 = (pd.Timestamp('2004-05-26') + offsets.DateOffset(84)).dayofyear

sos0405 = (pd.Timestamp('2004-10-09') + offsets.DateOffset(84)).dayofyear
eos0405 = (pd.Timestamp('2005-05-28') + offsets.DateOffset(84)).dayofyear

sos0506 = (pd.Timestamp('2005-10-18') + offsets.DateOffset(84)).dayofyear
eos0506 = (pd.Timestamp('2006-05-30') + offsets.DateOffset(84)).dayofyear

sos0607 = (pd.Timestamp('2006-10-18') + offsets.DateOffset(84)).dayofyear
eos0607 = (pd.Timestamp('2007-05-30') + offsets.DateOffset(84)).dayofyear

sos0708 = (pd.Timestamp('2007-10-20') + offsets.DateOffset(84)).dayofyear
eos0708 = (pd.Timestamp('2008-06-02') + offsets.DateOffset(84)).dayofyear

sos0809 = (pd.Timestamp('2008-10-21') + offsets.DateOffset(84)).dayofyear
                  index=pd.date_range('20170101', periods=100000, freq='T'))
ts

# In[34]:

ts['2017-09-19 21:00:59':'2017-09-19 21:30:00']

# ## 5. 时间序列计算

# In[37]:

from pandas.tseries import offsets

dt = pd.Timestamp('2017-9-19 21:18:00')

dt + offsets.DateOffset(months=1, days=2, hour=3)

# ## 6. 其他方法

# ## 6.1 移动 Shifting

# In[39]:

ts = pd.DataFrame(np.random.randn(7, 2),
                  columns=['Value1', 'Value2'],
                  index=pd.date_range('20170101', periods=7, freq='T'))
ts

# In[40]:

ts.shift(3)