Ejemplo n.º 1
0
    def test_get_freq_code(self):
        # frequency str
        assert (frequencies.get_freq_code('A') == (frequencies.get_freq('A'),
                                                   1))
        assert (frequencies.get_freq_code('3D') == (frequencies.get_freq('D'),
                                                    3))
        assert (frequencies.get_freq_code('-2M') == (frequencies.get_freq('M'),
                                                     -2))

        # tuple
        assert (frequencies.get_freq_code(
            ('D', 1)) == (frequencies.get_freq('D'), 1))
        assert (frequencies.get_freq_code(
            ('A', 3)) == (frequencies.get_freq('A'), 3))
        assert (frequencies.get_freq_code(
            ('M', -2)) == (frequencies.get_freq('M'), -2))

        # numeric tuple
        assert frequencies.get_freq_code((1000, 1)) == (1000, 1)

        # offsets
        assert (frequencies.get_freq_code(
            offsets.Day()) == (frequencies.get_freq('D'), 1))
        assert (frequencies.get_freq_code(
            offsets.Day(3)) == (frequencies.get_freq('D'), 3))
        assert (frequencies.get_freq_code(
            offsets.Day(-2)) == (frequencies.get_freq('D'), -2))

        assert (frequencies.get_freq_code(
            offsets.MonthEnd()) == (frequencies.get_freq('M'), 1))
        assert (frequencies.get_freq_code(
            offsets.MonthEnd(3)) == (frequencies.get_freq('M'), 3))
        assert (frequencies.get_freq_code(
            offsets.MonthEnd(-2)) == (frequencies.get_freq('M'), -2))

        assert (frequencies.get_freq_code(
            offsets.Week()) == (frequencies.get_freq('W'), 1))
        assert (frequencies.get_freq_code(
            offsets.Week(3)) == (frequencies.get_freq('W'), 3))
        assert (frequencies.get_freq_code(
            offsets.Week(-2)) == (frequencies.get_freq('W'), -2))

        # Monday is weekday=0
        assert (frequencies.get_freq_code(
            offsets.Week(weekday=1)) == (frequencies.get_freq('W-TUE'), 1))
        assert (frequencies.get_freq_code(offsets.Week(
            3, weekday=0)) == (frequencies.get_freq('W-MON'), 3))
        assert (frequencies.get_freq_code(offsets.Week(
            -2, weekday=4)) == (frequencies.get_freq('W-FRI'), -2))
Ejemplo n.º 2
0
    def test_get_freq_code(self):
        # freqstr
        self.assertEqual(frequencies.get_freq_code('A'),
                         (frequencies.get_freq('A'), 1))
        self.assertEqual(frequencies.get_freq_code('3D'),
                         (frequencies.get_freq('D'), 3))
        self.assertEqual(frequencies.get_freq_code('-2M'),
                         (frequencies.get_freq('M'), -2))

        # tuple
        self.assertEqual(frequencies.get_freq_code(('D', 1)),
                         (frequencies.get_freq('D'), 1))
        self.assertEqual(frequencies.get_freq_code(('A', 3)),
                         (frequencies.get_freq('A'), 3))
        self.assertEqual(frequencies.get_freq_code(('M', -2)),
                         (frequencies.get_freq('M'), -2))
        # numeric tuple
        self.assertEqual(frequencies.get_freq_code((1000, 1)), (1000, 1))

        # offsets
        self.assertEqual(frequencies.get_freq_code(offsets.Day()),
                         (frequencies.get_freq('D'), 1))
        self.assertEqual(frequencies.get_freq_code(offsets.Day(3)),
                         (frequencies.get_freq('D'), 3))
        self.assertEqual(frequencies.get_freq_code(offsets.Day(-2)),
                         (frequencies.get_freq('D'), -2))

        self.assertEqual(frequencies.get_freq_code(offsets.MonthEnd()),
                         (frequencies.get_freq('M'), 1))
        self.assertEqual(frequencies.get_freq_code(offsets.MonthEnd(3)),
                         (frequencies.get_freq('M'), 3))
        self.assertEqual(frequencies.get_freq_code(offsets.MonthEnd(-2)),
                         (frequencies.get_freq('M'), -2))

        self.assertEqual(frequencies.get_freq_code(offsets.Week()),
                         (frequencies.get_freq('W'), 1))
        self.assertEqual(frequencies.get_freq_code(offsets.Week(3)),
                         (frequencies.get_freq('W'), 3))
        self.assertEqual(frequencies.get_freq_code(offsets.Week(-2)),
                         (frequencies.get_freq('W'), -2))

        # monday is weekday=0
        self.assertEqual(frequencies.get_freq_code(offsets.Week(weekday=1)),
                         (frequencies.get_freq('W-TUE'), 1))
        self.assertEqual(frequencies.get_freq_code(offsets.Week(3, weekday=0)),
                         (frequencies.get_freq('W-MON'), 3))
        self.assertEqual(
            frequencies.get_freq_code(offsets.Week(-2, weekday=4)),
            (frequencies.get_freq('W-FRI'), -2))
Ejemplo n.º 3
0
def test_is_superperiod_subperiod():
    assert (frequencies.is_superperiod(offsets.YearEnd(), offsets.MonthEnd()))
    assert (frequencies.is_subperiod(offsets.MonthEnd(), offsets.YearEnd()))

    assert (frequencies.is_superperiod(offsets.Hour(), offsets.Minute()))
    assert (frequencies.is_subperiod(offsets.Minute(), offsets.Hour()))

    assert (frequencies.is_superperiod(offsets.Second(), offsets.Milli()))
    assert (frequencies.is_subperiod(offsets.Milli(), offsets.Second()))

    assert (frequencies.is_superperiod(offsets.Milli(), offsets.Micro()))
    assert (frequencies.is_subperiod(offsets.Micro(), offsets.Milli()))

    assert (frequencies.is_superperiod(offsets.Micro(), offsets.Nano()))
    assert (frequencies.is_subperiod(offsets.Nano(), offsets.Micro()))
Ejemplo n.º 4
0
def project_dates(arg_base, arg_count):
    result = [
        pd.to_datetime(
            datetime.date(arg_base.year, arg_base.month, 1) +
            offsets.MonthEnd(month)) for month in range(1, arg_count + 1)
    ]
    return result
Ejemplo n.º 5
0
    def update_quarterly_data(self,
                              stockslist=None,
                              date=None,
                              start_date=None,
                              end_date=None):
        inds_to_update = ('assetstoequity', 'cashtocurrentdebt', 'current',
                          'longdebttodebt', 'grossprofitmargin_ttm2',
                          'longdebttoequity', 'qfa_deductedprofit', 'orps',
                          'eps_diluted2', 'qfa_grossprofitmargin',
                          'qfa_netprofitmargin', 'qfa_net_cash_flows_oper_act',
                          'qfa_net_profit_is', 'qfa_oper_rev', 'qfa_roa',
                          'qfa_roe', 'qfa_yoyocf', 'qfa_yoyprofit',
                          'qfa_yoysales', 'roa2_ttm2', 'roe_ttm2',
                          'stm_issuingdate', 'turnover_ttm', 'tot_equity',
                          'tot_liab', 'tot_assets',
                          'other_equity_instruments_PRE')

        curdate = toffsets.datetime.now().date() if date is None else date
        offset = curdate.month % 3 if (curdate.month % 3 != 0) else (
            curdate.month % 3 + 3)
        ndate = curdate - toffsets.MonthEnd(n=offset)

        for qname in inds_to_update:
            new_cols, new_data = self.update_ori_data(qname, 'q', stockslist,
                                                      date, start_date,
                                                      end_date)
            if new_cols:
                self.close_file(new_data, qname)
                print("\"{}\" data updated to date {}.".format(
                    qname,
                    str(ndate)[:10]))
            else:
                print(f"\"{qname}\"'s data don't need to be updated.")
Ejemplo n.º 6
0
    def get_date_ranges(self, start, end, scale='daily', include_bounds=True):
        '''
        Returns a list of dates sampled according to the specified parameters.

        Parameters
        ----------
        start: str
            First date that will be included.
        end: str
            Last date that will be included
        scale: {'daily', 'weekly', 'monthly', 'quarterly', 'yearly'}
            Scale specifies the sampling intervals.
        include_bounds: boolean
            Include start and end in the result if they are not included yet.
        '''
        if scale not in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly']:
            raise ValueError('Incorrect scale: %s' % scale)
        start = Timestamp(start)
        end = Timestamp(end)
        freq = dict(weekly='W', monthly='M', quarterly='3M', yearly='12M')
        offset = dict(weekly=off.Week(),
                      monthly=off.MonthEnd(),
                      quarterly=off.QuarterEnd(),
                      yearly=off.YearEnd())
        if scale == 'daily':
            ret = pd.date_range(start, end, freq='D')
        else:
            ret = pd.date_range(start + offset[scale], end, freq=freq[scale])
        ret = list(ret)
        if include_bounds:
            if start not in ret:
                ret = [start] + ret
            if end not in ret:
                ret = ret + [end]
        return ret
Ejemplo n.º 7
0
def load_shiller():
    """Load market & macroeconomic data from Robert Shiller's website.

    Returns
    -------
    iedata : pd.DataFrame
        Time series of S&P 500 and interest rate variables.

    Example
    -------
    >>> from pyfinance import datasets
    >>> shiller = datasets.load_shiller()
    >>> shiller.iloc[:7, :5]
                sp50p  sp50d  sp50e      cpi  real_rate
    date
    1871-01-31   4.44   0.26    0.4  12.4641     5.3200
    1871-02-28   4.50   0.26    0.4  12.8446     5.3233
    1871-03-31   4.61   0.26    0.4  13.0350     5.3267
    1871-04-30   4.74   0.26    0.4  12.5592     5.3300
    1871-05-31   4.86   0.26    0.4  12.2738     5.3333
    1871-06-30   4.82   0.26    0.4  12.0835     5.3367
    1871-07-31   4.73   0.26    0.4  12.0835     5.3400

    .. _ONLINE DATA ROBERT SHILLER:
        http://www.econ.yale.edu/~shiller/data.htm
    """

    xls = 'http://www.econ.yale.edu/~shiller/data/ie_data.xls'
    cols = ['date', 'sp50p', 'sp50d', 'sp50e', 'cpi', 'frac', 'real_rate',
            'real_sp50p', 'real_sp50d', 'real_sp50e', 'cape']
    iedata = pd.read_excel(xls, sheet_name='Data', skiprows=7,
                           skip_footer=1, names=cols).drop('frac', axis=1)
    dt = iedata['date'].astype(str).str.replace('.', '') + '01'
    iedata['date'] = pd.to_datetime(dt, format="%Y%m%d") + offsets.MonthEnd()
    return iedata.set_index('date')
Ejemplo n.º 8
0
    def get_dates_range(self,
                        scale='auto',
                        start=None,
                        end=None,
                        date_max='2010-01-01'):
        '''
        Returns a list of dates sampled according to the specified parameters.

        :param scale: {'auto', 'maximum', 'daily', 'weekly', 'monthly',
            'quarterly', 'yearly'}
            Scale specifies the sampling intervals.
            'auto' will heuristically choose a scale for quick processing
        :param start: First date that will be included.
        :param end: Last date that will be included
        '''
        if scale not in [
                'auto', 'maximum', 'daily', 'weekly', 'monthly', 'quarterly',
                'yearly'
        ]:
            raise ValueError('Incorrect scale: %s' % scale)
        start = Timestamp(start or self._start.min() or date_max)
        # FIXME: start != start is true for NaN objects... is NaT the same?
        start = Timestamp(date_max) if repr(start) == 'NaT' else start
        end = Timestamp(end
                        or max(Timestamp(self._end.max()), self._start.max()))
        # FIXME: end != end ?
        end = datetime.utcnow() if repr(end) == 'NaT' else end
        start = start if self.check_in_bounds(start) else self._lbound
        end = end if self.check_in_bounds(end) else self._rbound

        if scale == 'auto':
            scale = self._auto_select_scale(start, end)
        if scale == 'maximum':
            start_dts = list(self._start.dropna().values)
            end_dts = list(self._end.dropna().values)
            dts = map(Timestamp, set(start_dts + end_dts))
            dts = filter(
                lambda ts: self.check_in_bounds(ts) and ts >= start and ts <=
                end, dts)
            return dts

        freq = dict(daily='D',
                    weekly='W',
                    monthly='M',
                    quarterly='3M',
                    yearly='12M')
        offset = dict(daily=off.Day(n=0),
                      weekly=off.Week(),
                      monthly=off.MonthEnd(),
                      quarterly=off.QuarterEnd(),
                      yearly=off.YearEnd())
        # for some reason, weekly date range gives one week less:
        end_ = end + off.Week() if scale == 'weekly' else end
        ret = list(pd.date_range(start + offset[scale], end_,
                                 freq=freq[scale]))
        ret = [dt for dt in ret if dt <= end]
        ret = [start] + ret if ret and start < ret[0] else ret
        ret = ret + [end] if ret and end > ret[-1] else ret
        ret = filter(lambda ts: self.check_in_bounds(ts), ret)
        return ret
Ejemplo n.º 9
0
def get_clusters(origin):
    year = int(str(origin)[:4])
    with engine.connect() as conn:
        sql = f"select 1 from clusters where date_part('year', date)={year}"
        res = conn.execute(sql).fetchone()
        if res is None: return None, None

        # offset = BusinessMonthEnd()
        offset = offsets.MonthEnd()
        end = offset.rollforward(origin.to_timestamp()).strftime('%Y-%m-%d')

        sql = f"""
        select ticker, date, mtd_1mf, vals from clusters 
        where look_12m=TRUE
        and date between '1995-12-29' and '{end}'
        order by date asc -- if not for sequencing, something to ensure consistent (same) clusters used each time  
        """
        df = pd.read_sql(sql, conn, index_col=['date', 'ticker']).sort_index()

        # unpack vals from col of lists to their own cols
        X = [x for x in df.vals.values]
        df = concat_x_cols(df.drop('vals', 1), X)

    df['y', 'mtd_1mf'] = df.y.mtd_1mf
    return df
Ejemplo n.º 10
0
def load_shiller(pickle_from=None, pickle_to=None):
    """Load data from Robert Shiller's website.

    Description: http://www.econ.yale.edu/~shiller/data.htm

    Examples
    ========
    shiller = load_shiller()
    shiller = shiller[shiller.index.month % 3 == 0]
    """

    link = 'http://www.econ.yale.edu/~shiller/data/ie_data.xls'
    iedata = (read_excel(link, sheetname='Data', skiprows=range(
        0, 7)).loc[:, :'CAPE'].dropna(subset=['Date']).drop('Fraction',
                                                            axis=1))
    cols = [
        'date', 'sp50p', 'sp50d', 'sp50e', 'cpi', 'real_rate', 'real_sp50p',
        'real_sp50d', 'real_sp50e', 'cape'
    ]
    iedata.columns = cols
    iedata.loc[:, 'date'] = (pd.to_datetime(
        (iedata.date.astype(str).str.replace('.', '') + '01'), format="%Y%m%d")
                             + offsets.MonthEnd(1))
    iedata.set_index('date', inplace=True)

    return iedata
Ejemplo n.º 11
0
    def update_monthly_data(self,
                            stockslist=None,
                            date=None,
                            start_date=None,
                            end_date=None):
        inds_to_update = ('sec_name1', 'industry_citic',
                          'industry_citic_level2', 'mkt_cap_float', 'pe_ttm',
                          'val_pe_deducted_ttm', 'ps_ttm', 'pb_lf',
                          'profit_ttm', 'pcf_ncf_ttm', 'pcf_ocf_ttm',
                          'dividendyield2', 'or_ttm', 'deductedprofit_ttm',
                          'ocfps_ttm', 'eps_ttm', 'holder_num',
                          'holder_avgpct', 'pct_chg_M')

        #        self.update_quarterly_data()
        #        self.qdata_to_mdata((start_date and end_date))
        #
        curdate = toffsets.datetime.now().date() if date is None else date
        ndate = curdate - toffsets.MonthEnd(n=1)
        for qname in inds_to_update:
            new_cols, new_data = self.update_ori_data(qname, 'M', stockslist,
                                                      date, start_date,
                                                      end_date)
            if new_cols:
                if len(new_cols) == 1:
                    fill_cols = new_data.columns[-2:]
                    new_data.loc[:, fill_cols] = new_data.loc[:, fill_cols].\
                                                 fillna(axis=1, method='ffill')
                self.close_file(new_data, qname)
                print("\"{}\" data updated to date {}.".format(
                    qname,
                    str(ndate)[:10]))
            else:
                print(f"\"{qname}\"'s data don't need to be updated.")

        #profit_ttm_G
        profit_ttm_G = self.profit_ttm.T / self.profit_ttm.T.shift(12) - 1
        profit_ttm_G = profit_ttm_G.T.dropna(how='all', axis=1)
        self.close_file(profit_ttm_G, "profit_ttm_G")
        print("'profit_ttm_G' updated.")

        #holder_avgpctchg
        holder_avgpct_cal = 1000 / self.holder_num
        holder_avgpct_cal, holder_avgpct_get = self._align_element(
            holder_avgpct_cal, self.holder_avgpct)
        orival, fillval = holder_avgpct_get.values, holder_avgpct_cal.values

        newval = np.where(np.isnan(orival), fillval, orival)
        holder_avgpct_fill = pd.DataFrame(newval,
                                          index=holder_avgpct_get.index,
                                          columns=holder_avgpct_get.columns)
        self.close_file(holder_avgpct_fill, "holder_avgpct_fill")

        h_fill = holder_avgpct_fill.T
        holder_avgpctchg = h_fill / h_fill.shift(12) - 1
        holder_avgpctchg = holder_avgpctchg.T.dropna(how='all', axis=1)
        self.close_file(holder_avgpctchg, "holder_avgpctchg")
        print("'holder_avgpct' updated.")
def data_move_test():
  s = pd.Series(np.random.randn(6), index=pd.date_range('1/1/2019', periods=6, freq='M'))
  print('原数据 \r\n', s)
  # 单纯的前后移动(数据移动,产生缺失数据)
  print('数据往后移动 \r\n', s.shift(2))
  print('数据往前移动 \r\n', s.shift(-2))
  print('后移动  freg参数,根据频率移动,实际对时间戳进行位移而不是对数据进行位移 \r\n', s.shift(2, freq='M'))
  print('前移动 freg参数\r\n', s.shift(-2, freq='D'))

  now = datetime.today()
  print('datetim 今天:\r\n', now)
  print('datetim 偏移 3天\r\n', now + 3 * offset.Day())
  print('datetim 偏移 到本月底\r\n', now + offset.MonthEnd())
  print('datetim期偏移 第2月后的月底\r\n', now + offset.MonthEnd(2))

  print('rollforward 向前滚到当月底 \r\n', offset.MonthEnd().rollforward(now))
  print('rollforward 向后滚到上月底\r\n', offset.MonthEnd().rollback(now))
  print('Series的时间戳 向前滚到月底\r\n', s.groupby(offset.MonthEnd().rollforward).count())
Ejemplo n.º 13
0
def test_is_superperiod_subperiod():

    # input validation
    assert not (frequencies.is_superperiod(offsets.YearEnd(), None))
    assert not (frequencies.is_subperiod(offsets.MonthEnd(), None))
    assert not (frequencies.is_superperiod(None, offsets.YearEnd()))
    assert not (frequencies.is_subperiod(None, offsets.MonthEnd()))
    assert not (frequencies.is_superperiod(None, None))
    assert not (frequencies.is_subperiod(None, None))

    assert (frequencies.is_superperiod(offsets.YearEnd(), offsets.MonthEnd()))
    assert (frequencies.is_subperiod(offsets.MonthEnd(), offsets.YearEnd()))

    assert (frequencies.is_superperiod(offsets.Hour(), offsets.Minute()))
    assert (frequencies.is_subperiod(offsets.Minute(), offsets.Hour()))

    assert (frequencies.is_superperiod(offsets.Second(), offsets.Milli()))
    assert (frequencies.is_subperiod(offsets.Milli(), offsets.Second()))

    assert (frequencies.is_superperiod(offsets.Milli(), offsets.Micro()))
    assert (frequencies.is_subperiod(offsets.Micro(), offsets.Milli()))

    assert (frequencies.is_superperiod(offsets.Micro(), offsets.Nano()))
    assert (frequencies.is_subperiod(offsets.Nano(), offsets.Micro()))
Ejemplo n.º 14
0
def regress_by_store(df):
    ret_list = []
    month_ends = pd.date_range(start='01/01/2016', end='05/01/2017', freq='M')
    for month_end in month_ends:
        quarter_start = month_end - offsets.MonthBegin(3)
        quarter_df = take_df_by_valid_period(df, quarter_start, month_end)
        if quarter_df.empty:
            continue
        next_month_start = month_end + offsets.MonthBegin(1)
        next_month_end = month_end + offsets.MonthEnd(1)
        next_month_df = take_df_by_period(df, next_month_start, next_month_end)
        if next_month_df.empty:
            continue
        quarter_y_pred = do_regression(quarter_df, next_month_df)

        year_start = month_end - offsets.MonthBegin(12)
        year_df = take_df_by_valid_period(df, year_start, month_end)
        year_y_pred = do_regression(year_df, next_month_df)

        temp_df = pd.DataFrame(index=next_month_df.index)
        temp_df["quarter_regress_no_dow"] = quarter_y_pred
        temp_df["year_regress_no_dow"] = year_y_pred
        ret_list.append(temp_df)
    return ret_list
Ejemplo n.º 15
0
def load_rf(
    freq='M',
    pickle_from=None,
    pickle_to=None,
):
    """Build a risk-free rate return series using 3-month US T-bill yields.

    The 3-Month Treasury Bill: Secondary Market Rate from the Federal Reserve
    (a yield) is convert to a total return.  See 'Methodology' for details.

    The time series should closely mimic returns of the BofA Merrill Lynch US
    Treasury Bill (3M) (Local Total Return) index.

    Parameters
    ==========
    reload : bool, default False
        If False, use pickled data.  If True, reload from source
    freq : str, sequence, or set
        If a single-character string, return a single-column DataFrame with
        index frequency corresponding to `freq`.  If a sequence or set, return
        a dict of DataFrames with the keys corresponding to `freq`(s)

    Methodology
    ===========
    The Federal Reserve publishes a daily chart of Selected Interest Rates
    (release H.15; www.federalreserve.gov/releases/h15/).  As with a yield
    curve, some yields are interpolated from recent issues because Treasury
    auctions do not occur daily.

    While the de-annualized ex-ante yield itself is a fairly good tracker of
    the day's total return, it is not perfect and can exhibit non-neglible
    error in periods of volatile short rates.  The purpose of this function
    is to convert yields to total returns for 3-month T-bills.  It is a
    straightforward process given that these are discount (zero-coupon)
    securities.  It consists of buying a 3-month bond at the beginning of each
    month, then amortizing that bond throughout the month to back into the
    price of a <3-month tenor bond.

    The source data (pulled from fred.stlouisfed.org) is quoted on a discount
    basis.  (See footnote 4 from release H.15.)  This is converted to a
    bond-equivlanet yield (BEY) and then translated to a hypothetical daily
    total return.

    The process largely follows Morningstar's published Return Calculation of
    U.S. Treasury Constant Maturity Indices, and is as follows:
    - At the beginning of each month a bill is purchased at the prior month-end
      price, and daily returns in the month reflect the change in daily
      valuation of this bill
    - If t is not a business day, its yield is the yield of the prior
      business day.
    - At each day during the month, the price of a 3-month bill purchased on
      the final calendar day of the previous month is computed.
    - Month-end pricing is unique.  At each month-end date, there are
      effectively two bonds and two prices.  The first is the bond
      hypothetically purchased on the final day of the prior month with 2m
      remaining to maturity, and the second is a new-issue bond purchased that
      day with 3m to maturity.  The former is used as the numerator to compute
      that day's total return, while the latter is used as the denominator
      to compute the next day's (1st day of next month) total return.

    Description of the BofA Merrill Lynch US 3-Month Treasury Bill Index:
    The BofA Merrill Lynch US 3-Month Treasury Bill Index is comprised of a
    single issue purchased at the beginning of the month and held for a full
    month. At the end of the month that issue is sold and rolled into a newly
    selected issue. The     issue selected at each month-end rebalancing is the
    outstanding Treasury Bill that matures closest to, but not beyond, three
    months from the rebalancing date. To qualify for selection, an issue must
    have settled on or before the month-end rebalancing date.
        (Source: Bank of America Merrill Lynch)

    See also
    ========
    FRED: 3-Month Treasury Bill: Secondary Market Rate (DTB3)
      https://fred.stlouisfed.org/series/DTB3
    McGraw-Hill/Irwin, Interest Rates, 2008.
      https://people.ucsc.edu/~lbaum/econ80h/LS-Chap009.pdf
    Morningstar, Return Calculation of U.S. Treasury Constant Maturity Indices,
      September 2008.
    """

    # Validate `freq` param
    freqs = list('DWMQA')
    freq = freq.upper() if freq.islower() else freq
    if freq not in freqs:
        raise ValueError('`freq` must be either a single element or subset'
                         ' from %s, case-insensitive' % freqs)

    # Load daily 3-Month Treasury Bill: Secondary Market Rate
    # Note that this is on discount basis and will be converted to BEY
    # Periodicity is daily
    rates = dr('DTB3', 'fred', DSTART) * 0.01
    rates = (rates.asfreq('D',
                          method='ffill').fillna(method='ffill').squeeze())

    # Algebra doesn't 'work' on DateOffsets, don't simplify here!
    trigger = rates.index.is_month_end
    dtm_old = rates.index + offsets.MonthEnd(-1) + offsets.MonthEnd(3) \
            - rates.index
    dtm_new = rates.index.where(trigger, rates.index +
                                offsets.MonthEnd(-1)) \
            + offsets.MonthEnd(3) - rates.index

    # This does 2 things in one step:
    # (1) convert discount yield to BEY
    # (2) get the price at that BEY and days to maturity
    # The two equations are simplified
    # See https://people.ucsc.edu/~lbaum/econ80h/LS-Chap009.pdf
    p_old = (100 / 360) * (360 - rates * dtm_old.days)
    p_new = (100 / 360) * (360 - rates * dtm_new.days)

    res = p_old.pct_change().where(trigger, p_new.pct_change())
    res = returns.prep(res, in_format='dec', name='RF', freq='D')

    if freq != 'D':
        res = returns.prep(dr.rollup(out_freq=freq),
                           in_format='dec',
                           freq=freq)

    return res
Ejemplo n.º 16
0
 def test_pickle_freq(self):
     # GH#2891
     prng = period_range("1/1/2011", "1/1/2012", freq="M")
     new_prng = tm.round_trip_pickle(prng)
     assert new_prng.freq == offsets.MonthEnd()
     assert new_prng.freqstr == "M"
Ejemplo n.º 17
0
lst = fc_table.columns.tolist()


# ## 年月表記の Timestamp を月末の日付に

# In[ ]:


import pandas.tseries.offsets as offsets


# In[ ]:


pd.to_datetime(bs_table['決算期'], format='%Y.%m') + offsets.MonthEnd()


# ## 特定の行(列)の削除

# In[ ]:


# 行 3 と 4 を削除
df.drop([3,4])


# In[ ]:


# 列 A を削除
Ejemplo n.º 18
0
                                             yearfirst=True).date())
            qr_table['発表日'] = qr_table.loc[
                qr_table['発表日'].str.match('\d\d/\d\d/\d\d'),
                '発表日'].apply(lambda x: parse(x, yearfirst=True).date())
            # pandasのTimestampへの型変換
            qr_table['Q期首'] = pd.to_datetime(qr_table['Q期首'],
                                             format='%Y-%m-%d')
            qr_table['発表日'] = pd.to_datetime(qr_table['発表日'],
                                             format='%Y-%m-%d')

            # 通期業績の決算期を参照して決算期列を追加
            # 通期業績の予想値削除前に別名でキープした決算期シリーズを利用
            for start_idx, start in qr_table['Q期首'].iteritems():
                for end in pl_end:
                    if start < pd.to_datetime(
                            end, format='%Y.%m') + offsets.MonthEnd():
                        qr_table.loc[start_idx, '決算期'] = end
                        break

            # 数値の列の数値以外の文字列 ('-' 等) を NaN に置換
            num_col = ('Q売上高', 'Q営業益', 'Q経常益', 'Q最終益', 'Q1株益', 'Q売上営業損益率')
            for key in num_col:
                if qr_table[key].dtypes == object:
                    qr_table.loc[~qr_table[key].str.replace(r'\.|\-', ""
                                                            ).str.isnumeric(),
                                 key] = np.nan  # .str を2回も使わないといけないのはなんだか。。。
                    # qr_table.loc[qr_table[key].str.contains('-'), key] = np.nan # この書き方だと '-'  以外の文字列に対応できないので不安

            # 型変換
            # 辞書内包表記による一括変換
            qr_table = qr_table.astype({
Ejemplo n.º 19
0
def test_is_superperiod_subperiod():
    assert (fmod.is_superperiod(offsets.YearEnd(), offsets.MonthEnd()))
    assert (fmod.is_subperiod(offsets.MonthEnd(), offsets.YearEnd()))
Ejemplo n.º 20
0
        ("-2M", (get_freq("M"), -2)),

        # Tuple.
        (("D", 1), (get_freq("D"), 1)),
        (("A", 3), (get_freq("A"), 3)),
        (("M", -2), (get_freq("M"), -2)),
        ((5, "T"), (FreqGroup.FR_MIN, 5)),

        # Numeric Tuple.
        ((1000, 1), (1000, 1)),

        # Offsets.
        (offsets.Day(), (get_freq("D"), 1)),
        (offsets.Day(3), (get_freq("D"), 3)),
        (offsets.Day(-2), (get_freq("D"), -2)),
        (offsets.MonthEnd(), (get_freq("M"), 1)),
        (offsets.MonthEnd(3), (get_freq("M"), 3)),
        (offsets.MonthEnd(-2), (get_freq("M"), -2)),
        (offsets.Week(), (get_freq("W"), 1)),
        (offsets.Week(3), (get_freq("W"), 3)),
        (offsets.Week(-2), (get_freq("W"), -2)),
        (offsets.Hour(), (FreqGroup.FR_HR, 1)),

        # Monday is weekday=0.
        (offsets.Week(weekday=1), (get_freq("W-TUE"), 1)),
        (offsets.Week(3, weekday=0), (get_freq("W-MON"), 3)),
        (offsets.Week(-2, weekday=4), (get_freq("W-FRI"), -2)),
    ])
def test_get_freq_code(freq_input, expected):
    assert get_freq_code(freq_input) == expected
Ejemplo n.º 21
0
        # Frequency string.
        ("A", (get_freq_code("A")[0], 1)),
        ("3D", (get_freq_code("D")[0], 3)),
        ("-2M", (get_freq_code("M")[0], -2)),
        # Tuple.
        (("D", 1), (get_freq_code("D")[0], 1)),
        (("A", 3), (get_freq_code("A")[0], 3)),
        (("M", -2), (get_freq_code("M")[0], -2)),
        ((5, "T"), (FreqGroup.FR_MIN, 5)),
        # Numeric Tuple.
        ((1000, 1), (1000, 1)),
        # Offsets.
        (offsets.Day(), (get_freq_code("D")[0], 1)),
        (offsets.Day(3), (get_freq_code("D")[0], 3)),
        (offsets.Day(-2), (get_freq_code("D")[0], -2)),
        (offsets.MonthEnd(), (get_freq_code("M")[0], 1)),
        (offsets.MonthEnd(3), (get_freq_code("M")[0], 3)),
        (offsets.MonthEnd(-2), (get_freq_code("M")[0], -2)),
        (offsets.Week(), (get_freq_code("W")[0], 1)),
        (offsets.Week(3), (get_freq_code("W")[0], 3)),
        (offsets.Week(-2), (get_freq_code("W")[0], -2)),
        (offsets.Hour(), (FreqGroup.FR_HR, 1)),
        # Monday is weekday=0.
        (offsets.Week(weekday=1), (get_freq_code("W-TUE")[0], 1)),
        (offsets.Week(3, weekday=0), (get_freq_code("W-MON")[0], 3)),
        (offsets.Week(-2, weekday=4), (get_freq_code("W-FRI")[0], -2)),
    ],
)
def test_get_freq_code(freq_input, expected):
    assert get_freq_code(freq_input) == expected
Ejemplo n.º 22
0
def load_factors():
    """Load risk factor returns.

    Factors
    -------
    Symbol      Description                                            Source
    ------      ----------                                             ------
    MKT                                                                French
    SMB         Size (small minus big)                                 French
    HML         Value (high minus low)                                 French
    RMW         Profitability (robust minus weak)                      French
    CMA         Investment (conservative minus aggressive)             French
    UMD         Momentum (up minus down)                               French
    STR         Short-term reversal                                    French
    LTR         Long-term reversal                                     French
    BETA        Beta                                                   French
    ACC         Accruals                                               French
    VAR         Variance                                               French
    IVAR        Residual variance                                      French
    EP          Earnings-to-price                                      French
    CP          Cash flow-to-price                                     French
    DP          Dividend-to-price                                      French
    BAB         Betting against beta                                   AQR
    QMJ         Quality minus junk                                     AQR
    HMLD        Value (high minus low) [modified version]              AQR
    LIQ         Liquidity                                              Pastor
    BDLB        Bond lookback straddle                                 Hsieh
    FXLB        Curency lookback straddle                              Hsieh
    CMLB        Commodity lookback straddle                            Hsieh
    IRLB        Interest rate lookback straddle                        Hsieh
    STLB        Stock lookback straddle                                Hsieh
    PUT         CBOE S&P 500 PutWrite Index                            CBOE
    BXM         CBOE S&P 500 BuyWrite Index®                           CBOE
    RXM         CBOE S&P 500 Risk Reversal Index                       CBOE

    Source Directory
    ----------------
    Source      Link
    ------      ----
    French      http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html  # noqa
    Pastor      http://faculty.chicagobooth.edu/lubos.pastor/research/liq_data_1962_2016.txt  # noqa
    AQR         https://www.aqr.com/library/data-sets
    Hsieh       https://faculty.fuqua.duke.edu/~dah7/HFData.htm
    Fed         https://fred.stlouisfed.org/
    CBOE        http://www.cboe.com/products/strategy-benchmark-indexes
    """

    # TODO: factors elegible for addition
    #   VIIX, VIIZ, XIV, ZIV, CRP (AQR)
    #   http://www.cboe.com/micro/buywrite/monthendpricehistory.xls ends 2016
    #   could use:
    #   http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/putdailyprice.csv

    # Warning: slow, kludgy data retrieval follows
    # ------------------------------------------------------------------------

    # `tgt` will become a list of DataFrames and eventually concatenated
    tgt = []

    # MKT, SMB, HML, RMW, CMA, RF, UMD, STR, LTR
    facs = [
        "F-F_Research_Data_5_Factors_2x3",
        "F-F_Momentum_Factor",
        "F-F_ST_Reversal_Factor",
        "F-F_LT_Reversal_Factor",
    ]

    for fac in facs:
        tgt.append(pdr.DataReader(fac, "famafrench", DSTART)[0])

    # BETA, ACC, VAR, IVAR require some manipulation to compute returns
    # in the dual-sort method of Fama-French
    for i in ["BETA", "AC", "VAR", "RESVAR"]:
        ser = pdr.DataReader(
            "25_Portfolios_ME_" + i + "_5x5", "famafrench", DSTART
        )[0]
        ser = ser.iloc[:, [0, 5, 10, 15, 20]].mean(axis=1) - ser.iloc[
            :, [4, 9, 14, 19, 24]
        ].mean(axis=1)
        ser = ser.rename(i)
        tgt.append(ser)

    # E/P, CF/P, D/P (univariate sorts, quintile spreads)
    for i in ["E-P", "CF-P", "D-P"]:
        ser = pdr.DataReader(
            "Portfolios_Formed_on_" + i, "famafrench", DSTART
        )[0]
        ser = ser.loc[:, "Hi 20"] - ser.loc[:, "Lo 20"]
        ser = ser.rename(i)
        tgt.append(ser)

    tgt = [df.to_timestamp(how="end") for df in tgt]

    # BAB, QMJ, HMLD
    # TODO: performance is poor here, runtime is eaten up by these 3
    links = {
        "BAB": "http://bit.ly/2hWyaG8",
        "QMJ": "http://bit.ly/2hUBSgF",
        "HMLD": "http://bit.ly/2hdVb7G",
    }
    for key, value in links.items():
        ser = pd.read_excel(value, header=18, index_col=0)["USA"] * 100
        ser = ser.rename(key)
        tgt.append(ser)

    # Lookback straddles
    link = "http://faculty.fuqua.duke.edu/~dah7/DataLibrary/TF-Fac.xls"
    straddles = pd.read_excel(link, header=14, index_col=0)
    straddles.index = pd.DatetimeIndex(
        straddles.index.astype(str) + "01"
    ) + offsets.MonthEnd(1)
    straddles = straddles * 100.0
    tgt.append(straddles)

    # LIQ
    link = "http://bit.ly/2pn2oBK"
    liq = pd.read_csv(
        link,
        skiprows=14,
        delim_whitespace=True,
        header=None,
        usecols=[0, 3],
        index_col=0,
        names=["date", "LIQ"],
    )
    liq.index = pd.DatetimeIndex(
        liq.index.astype(str) + "01"
    ) + offsets.MonthEnd(1)
    liq = liq.replace(-99, np.nan) * 100.0
    tgt.append(liq)

    # USD, HY
    fred = pdr.DataReader(["DTWEXB", "BAMLH0A0HYM2"], "fred", DSTART)
    fred = fred.asfreq("D", method="ffill").fillna(method="ffill").asfreq("M")
    fred.loc[:, "DTWEXB"] = fred["DTWEXB"].pct_change() * 100.0
    fred.loc[:, "BAMLH0A0HYM2"] = fred["BAMLH0A0HYM2"].diff()
    tgt.append(fred)

    # PUT, BXM, RXM (CBOE options strategy indices)
    link1 = "http://www.cboe.com/micro/put/put_86-06.xls"
    link2 = "http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/putdailyprice.csv"  # noqa

    put1 = pd.read_excel(
        link1, index_col=0, skiprows=6, header=None
    ).rename_axis("DATE")
    put2 = pd.read_csv(
        link2, index_col=0, parse_dates=True, skiprows=7, header=None
    ).rename_axis("DATE")
    put = (
        pd.concat((put1, put2))
        .rename(columns={1: "PUT"})
        .iloc[:, 0]
        .asfreq("D", method="ffill")
        .fillna(method="ffill")
        .asfreq("M")
        .pct_change()
        * 100.0
    )
    tgt.append(put)

    link1 = "http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/bxmarchive.csv"  # noqa
    link2 = "http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/bxmcurrent.csv"  # noqa

    bxm1 = pd.read_csv(
        link1, index_col=0, parse_dates=True, skiprows=5, header=None
    ).rename_axis("DATE")
    bxm2 = pd.read_csv(
        link2, index_col=0, parse_dates=True, skiprows=4, header=None
    ).rename_axis("DATE")
    bxm = (
        pd.concat((bxm1, bxm2))
        .rename(columns={1: "BXM"})
        .iloc[:, 0]
        .asfreq("D", method="ffill")
        .fillna(method="ffill")
        .asfreq("M")
        .pct_change()
        * 100.0
    )
    tgt.append(bxm)

    link = "http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/rxm_historical.csv"  # noqa
    rxm = (
        pd.read_csv(
            link, index_col=0, parse_dates=True, skiprows=2, header=None
        )
        .rename(columns={1: "RXM"})
        .rename_axis("DATE")
        .iloc[:, 0]
        .asfreq("D", method="ffill")
        .fillna(method="ffill")
        .asfreq("M")
        .pct_change()
        * 100.0
    )
    tgt.append(rxm)

    # Clean up data retrieved above
    # -----------------------------------------------------------------

    factors = pd.concat(tgt, axis=1).round(2)
    newnames = {
        "Mkt-RF": "MKT",
        "Mom   ": "UMD",
        "ST_Rev": "STR",
        "LT_Rev": "LTR",
        "RESVAR": "IVAR",
        "AC": "ACC",
        "PTFSBD": "BDLB",
        "PTFSFX": "FXLB",
        "PTFSCOM": "CMLB",
        "PTFSIR": "IRLB",
        "PTFSSTK": "STLB",
        "DTWEXB": "USD",
        "BAMLH0A0HYM2": "HY",
    }
    factors.rename(columns=newnames, inplace=True)

    # Get last valid RF date; returns will be constrained to this date
    factors = factors[: factors["RF"].last_valid_index()]

    # Subtract RF for long-only factors
    subtract = ["HY", "PUT", "BXM", "RXM"]

    for i in subtract:
        factors.loc[:, i] = factors[i] - factors["RF"]

    return factors
Ejemplo n.º 23
0
 def _get_month_end(self, date):
     _, days = calendar.monthrange(date.year, date.month)
     if date.day == days:
         return date
     else:
         return date + toffsets.MonthEnd(n=1)
Ejemplo n.º 24
0
def make_date(arg_year, arg_month):
    result = pd.to_datetime(
        datetime.date(arg_year, arg_month, 1) + offsets.MonthEnd(0))
    return result
Ejemplo n.º 25
0
        ("W-FRI", 4005),
        ("Min", 8000),
        ("ms", 10000),
        ("US", 11000),
        ("NS", 12000),
    ],
)
def test_period_str_to_code(obj, expected):
    assert _period_str_to_code(obj) == expected


@pytest.mark.parametrize(
    "p1,p2,expected",
    [
        # Input validation.
        (offsets.MonthEnd(), None, False),
        (offsets.YearEnd(), None, False),
        (None, offsets.YearEnd(), False),
        (None, offsets.MonthEnd(), False),
        (None, None, False),
        (offsets.YearEnd(), offsets.MonthEnd(), True),
        (offsets.Hour(), offsets.Minute(), True),
        (offsets.Second(), offsets.Milli(), True),
        (offsets.Milli(), offsets.Micro(), True),
        (offsets.Micro(), offsets.Nano(), True),
    ],
)
def test_super_sub_symmetry(p1, p2, expected):
    assert is_superperiod(p1, p2) is expected
    assert is_subperiod(p2, p1) is expected
Ejemplo n.º 26
0
def load_retaildata(pickle_from=None, pickle_to=None):
    """Monthly retail trade data from census.gov."""
    # full = 'https://www.census.gov/retail/mrts/www/mrtssales92-present.xls'
    # indiv = 'https://www.census.gov/retail/marts/www/timeseries.html'

    db = {
        'Auto, other Motor Vehicle':
        'https://www.census.gov/retail/marts/www/adv441x0.txt',
        'Building Material and Garden Equipment and Supplies Dealers':
        'https://www.census.gov/retail/marts/www/adv44400.txt',
        'Clothing and Clothing Accessories Stores':
        'https://www.census.gov/retail/marts/www/adv44800.txt',
        'Dept. Stores (ex. leased depts)':
        'https://www.census.gov/retail/marts/www/adv45210.txt',
        'Electronics and Appliance Stores':
        'https://www.census.gov/retail/marts/www/adv44300.txt',
        'Food Services and Drinking Places':
        'https://www.census.gov/retail/marts/www/adv72200.txt',
        'Food and Beverage Stores':
        'https://www.census.gov/retail/marts/www/adv44500.txt',
        'Furniture and Home Furnishings Stores':
        'https://www.census.gov/retail/marts/www/adv44200.txt',
        'Gasoline Stations':
        'https://www.census.gov/retail/marts/www/adv44700.txt',
        'General Merchandise Stores':
        'https://www.census.gov/retail/marts/www/adv45200.txt',
        'Grocery Stores':
        'https://www.census.gov/retail/marts/www/adv44510.txt',
        'Health and Personal Care Stores':
        'https://www.census.gov/retail/marts/www/adv44600.txt',
        'Miscellaneous Store Retailers':
        'https://www.census.gov/retail/marts/www/adv45300.txt',
        'Motor Vehicle and Parts Dealers':
        'https://www.census.gov/retail/marts/www/adv44100.txt',
        'Nonstore Retailers':
        'https://www.census.gov/retail/marts/www/adv45400.txt',
        'Retail and Food Services, total':
        'https://www.census.gov/retail/marts/www/adv44x72.txt',
        'Retail, total':
        'https://www.census.gov/retail/marts/www/adv44000.txt',
        'Sporting Goods, Hobby, Book, and Music Stores':
        'https://www.census.gov/retail/marts/www/adv45100.txt',
        'Total (excl. Motor Vehicle)':
        'https://www.census.gov/retail/marts/www/adv44y72.txt',
        'Retail (excl. Motor Vehicle and Parts Dealers)':
        'https://www.census.gov/retail/marts/www/adv4400a.txt'
    }

    dct = {}
    for key, value in db.items():
        data = read_csv(value,
                        skiprows=5,
                        skip_blank_lines=True,
                        header=None,
                        sep='\s+',
                        index_col=0)
        try:
            cut = data.index.get_loc('SEASONAL')
        except KeyError:
            cut = data.index.get_loc('NO')
        data = data.iloc[:cut]
        data = data.apply(lambda col: pd.to_numeric(col, downcast='float'))
        data = data.stack()
        year = data.index.get_level_values(0)
        month = data.index.get_level_values(1)
        idx = pd.to_datetime({'year' : year, 'month' : month, 'day' : 1}) \
            + offsets.MonthEnd(1)
        data.index = idx
        data.name = key
        dct[key] = data

    sales = DataFrame(dct)
    sales = sales.reindex(
        pd.date_range(sales.index[0], sales.index[-1], freq='M'))
    # TODO: account for any skipped months; could specify a DateOffset to
    # `freq` param of `pandas.DataFrame.shift`
    yoy = sales.pct_change(periods=12)

    return sales, yoy
Ejemplo n.º 27
0
def load_factors(pickle_from=None, pickle_to=None):
    """Load risk factor returns.

    Factors
    =======
    Symbol      Description                                            Source
    ------      ----------                                             ------
    MKT                                                                French
    SMB         Size (small minus big)                                 French
    HML         Value (high minus low)                                 French
    RMW         Profitability (robust minus weak)                      French
    CMA         Investment (conservative minus aggressive)             French
    UMD         Momentum (up minus down)                               French
    STR         Short-term reversal                                    French
    LTR         Long-term reversal                                     French
    BETA        Beta                                                   French
    ACC         Accruals                                               French
    VAR         Variance                                               French
    IVAR        Residual variance                                      French
    EP          Earnings-to-price                                      French
    CP          Cash flow-to-price                                     French
    DP          Dividend-to-price                                      French
    BAB         Betting against beta                                   AQR
    QMJ         Quality minus junk                                     AQR
    HMLD        Value (high minus low) [modified version]              AQR
    LIQ         Liquidity                                              Pastor
    BDLB        Bond lookback straddle                                 Hsieh
    FXLB        Curency lookback straddle                              Hsieh
    CMLB        Commodity lookback straddle                            Hsieh
    IRLB        Interest rate lookback straddle                        Hsieh
    STLB        Stock lookback straddle                                Hsieh
    PUT         CBOE S&P 500 PutWrite Index                            CBOE
    BXM         CBOE S&P 500 BuyWrite Index®                           CBOE
    RXM         CBOE S&P 500 Risk Reversal Index                       CBOE

    Source Directory
    ================
    Source      Link
    ------      ----
    French      http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html
    Pastor      http://faculty.chicagobooth.edu/lubos.pastor/research/liq_data_1962_2016.txt
    AQR         https://www.aqr.com/library/data-sets
    Hsieh       https://faculty.fuqua.duke.edu/~dah7/HFData.htm
    Fed         https://fred.stlouisfed.org/
    CBOE        http://www.cboe.com/products/strategy-benchmark-indexes
    """

    # TODO: factors elegible for addition
    #   VIIX, VIIZ, XIV, ZIV, CRP (AQR)
    #   http://www.cboe.com/micro/buywrite/monthendpricehistory.xls ends 2016
    #   could use:
    #   http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/putdailyprice.csv

    # Warning: slow, kludgy data retrieval follows
    # ------------------------------------------------------------------------

    # `tgt` will become a list of DataFrames and eventually concatenated
    tgt = []

    # MKT, SMB, HML, RMW, CMA, RF, UMD, STR, LTR
    facs = [
        'F-F_Research_Data_5_Factors_2x3', 'F-F_Momentum_Factor',
        'F-F_ST_Reversal_Factor', 'F-F_LT_Reversal_Factor'
    ]

    for fac in facs:
        tgt.append(dr(fac, 'famafrench', DSTART)[0])

    # BETA, ACC, VAR, IVAR require some manipulation to compute returns
    # in the dual-sort method of Fama-French
    for i in ['BETA', 'AC', 'VAR', 'RESVAR']:
        ser = dr('25_Portfolios_ME_' + i + '_5x5', 'famafrench', DSTART)[0]
        ser = (ser.iloc[:, [0, 5, 10, 15, 20]].mean(axis=1) -
               ser.iloc[:, [4, 9, 14, 19, 24]].mean(axis=1))
        ser = ser.rename(i)
        tgt.append(ser)

    # E/P, CF/P, D/P (univariate sorts, quintile spreads)
    for i in ['E-P', 'CF-P', 'D-P']:
        ser = dr('Portfolios_Formed_on_' + i, 'famafrench', DSTART)[0]
        ser = ser.loc[:, 'Hi 20'] - ser.loc[:, 'Lo 20']
        ser = ser.rename(i)
        tgt.append(ser)

    tgt = [df.to_timestamp(how='end') for df in tgt]

    # BAB, QMJ, HMLD
    # TODO: performance is poor here, runtime is eaten up by these 3
    links = {
        'BAB': 'http://bit.ly/2hWyaG8',
        'QMJ': 'http://bit.ly/2hUBSgF',
        'HMLD': 'http://bit.ly/2hdVb7G'
    }
    for key, value in links.items():
        ser = read_excel(value, header=18, index_col=0)['USA'] * 100
        ser = ser.rename(key)
        tgt.append(ser)

    # Lookback straddles
    link = 'http://faculty.fuqua.duke.edu/~dah7/DataLibrary/TF-Fac.xls'
    straddles = read_excel(link, header=14, index_col=0)
    straddles.index = (pd.DatetimeIndex(straddles.index.astype(str) + '01') +
                       offsets.MonthEnd(1))
    straddles = straddles * 100.
    tgt.append(straddles)

    # LIQ
    link = 'http://bit.ly/2pn2oBK'
    liq = read_csv(link,
                   skiprows=14,
                   delim_whitespace=True,
                   header=None,
                   usecols=[0, 3],
                   index_col=0,
                   names=['date', 'LIQ'])
    liq.index = (pd.DatetimeIndex(liq.index.astype(str) + '01') +
                 offsets.MonthEnd(1))
    liq = liq.replace(-99, np.nan) * 100.
    tgt.append(liq)

    # USD, HY
    fred = dr(['DTWEXB', 'BAMLH0A0HYM2'], 'fred', DSTART)  # daily default
    fred = (fred.asfreq('D',
                        method='ffill').fillna(method='ffill').asfreq('M'))
    fred.loc[:, 'DTWEXB'] = fred['DTWEXB'].pct_change() * 100.
    fred.loc[:, 'BAMLH0A0HYM2'] = fred['BAMLH0A0HYM2'].diff()
    tgt.append(fred)

    # PUT, BXM, RXM (CBOE options strategy indices)
    link1 = 'http://www.cboe.com/micro/put/put_86-06.xls'
    link2 = 'http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/putdailyprice.csv'

    put1 = (read_excel(link1, index_col=0, skiprows=6,
                       header=None).rename_axis('DATE'))
    put2 = read_csv(link2,
                    index_col=0,
                    parse_dates=True,
                    skiprows=7,
                    header=None).rename_axis('DATE')
    put = (pd.concat((put1, put2)).rename(columns={
        1: 'PUT'
    }).iloc[:, 0].asfreq(
        'D', method='ffill').fillna(method='ffill').asfreq('M').pct_change() *
           100.)
    tgt.append(put)

    link1 = 'http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/bxmarchive.csv'
    link2 = 'http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/bxmcurrent.csv'

    bxm1 = read_csv(link1,
                    index_col=0,
                    parse_dates=True,
                    skiprows=5,
                    header=None).rename_axis('DATE')
    bxm2 = read_csv(link2,
                    index_col=0,
                    parse_dates=True,
                    skiprows=4,
                    header=None).rename_axis('DATE')
    bxm = (pd.concat((bxm1, bxm2)).rename(columns={
        1: 'BXM'
    }).iloc[:, 0].asfreq(
        'D', method='ffill').fillna(method='ffill').asfreq('M').pct_change() *
           100.)
    tgt.append(bxm)

    link = 'http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/rxm_historical.csv'
    rxm = (read_csv(
        link, index_col=0, parse_dates=True, skiprows=2,
        header=None).rename(columns={
            1: 'RXM'
        }).rename_axis('DATE').iloc[:, 0].asfreq('D', method='ffill').fillna(
            method='ffill').asfreq('M').pct_change() * 100.)
    tgt.append(rxm)

    # Clean up data retrieved above
    # ------------------------------------------------------------------------

    factors = pd.concat(tgt, axis=1).round(2)
    newnames = {
        'Mkt-RF': 'MKT',
        'Mom   ': 'UMD',
        'ST_Rev': 'STR',
        'LT_Rev': 'LTR',
        'RESVAR': 'IVAR',
        'AC': 'ACC',
        'PTFSBD': 'BDLB',
        'PTFSFX': 'FXLB',
        'PTFSCOM': 'CMLB',
        'PTFSIR': 'IRLB',
        'PTFSSTK': 'STLB',
        'DTWEXB': 'USD',
        'BAMLH0A0HYM2': 'HY'
    }
    factors.rename(columns=newnames, inplace=True)

    # Get last valid RF date; returns will be constrained to this date
    factors = factors[:factors['RF'].last_valid_index()]

    # Subtract RF for long-only factors
    subtract = ['HY', 'PUT', 'BXM', 'RXM']

    for i in subtract:
        factors.loc[:, i] = factors[i] - factors['RF']

    return factors
import pandas as pd
import pandas.tseries.offsets as offsets

week_ends = pd.date_range(start='01/02/2017', end='05/01/2017', freq='W')
print(week_ends)
print(week_ends[16] + offsets.Week(1))

exit(0)


month_ends = pd.date_range(start='01/01/2016', end='05/01/2017', freq='M')
print(month_ends[15].replace(day=22))
print(month_ends[15])
if month_ends[15].month == 4 and month_ends[15].year == 2017:
    month_ends[15].replace(day=22)
    print(month_ends[15])
print(month_ends)
exit(0)
for month_end in month_ends:
    quarter_start = month_end - offsets.MonthBegin(3)
    next_month_start = month_end + offsets.MonthBegin(1)
    next_month_end = month_end + offsets.MonthEnd(1)
    year_start = month_end - offsets.MonthBegin(12)

    print("-"*30)
    print(quarter_start)
    print(next_month_start)
    print(next_month_end)
    print(year_start)