Exemple #1
0
def get_bm():
    '''
    this function can be bookmarked as a snippet of how to manipulate date index
    in Pandas

    A little different with the book,here we use be and me for one share,
    but the data in the book is for all floating shares.However,it doesn't
    affect the bm.

    :return:
    '''
    be = read_df('bps', 'M')
    be = be[be.index.month == 12]
    me = read_df('stockCloseY', 'M')
    be, me = get_inter_frame([be, me])
    bm = be / me
    bm[bm <= 0] = np.nan  #delete those sample with bm<0
    bm = bm.shift(1, freq='6M')

    newIndex = pd.date_range(bm.index[0], bm.index[-1], freq='M')
    bm = bm.reindex(index=newIndex)
    bm = bm.fillna(method='ffill', limit=11)
    bm.to_csv(os.path.join(DATA_PATH, 'bm.csv'))

    logbm = np.log(bm)
    logbm.to_csv(os.path.join(DATA_PATH, 'logbm.csv'))
Exemple #2
0
def get_eretD():
    stockRetD = read_df('stockRetD', 'D')
    rfD = read_df('rfD', 'D')
    eretD = stockRetD.sub(rfD['rfD'], axis=0)
    # The date for stockRetD is buisiness date,but for rfD, it is calendar date.
    eretD = eretD.dropna(
        axis=0, how='all'
    )  # use this to ajust the index from calendar date to buisiness date
    eretD.to_csv(os.path.join(DATA_PATH, 'eretD.csv'))
Exemple #3
0
 def unify_value(self):
     #value
     bm = read_df('bm', 'M')
     logbm = read_df('logbm', 'M')
     bm = bm.stack()
     bm.index.names = ['t', 'sid']
     logbm = logbm.stack()
     logbm.index.names = ['t', 'sid']
     comb = pd.concat([bm, logbm], axis=1, keys=['bm', 'logbm'])
     return comb
Exemple #4
0
def compare_wind_gta_bps():
    '''
    the result is different a lot!!!

    :return:
    '''
    bps_wind = read_df('bps_wind', 'M')
    bps_gta = read_df('bps', 'M')
    bps_wind.columns = [str(int(col[:-3])) for col in bps_wind.columns]
    bps_wind = bps_wind.sort_index(axis=1)
    bps_gta = bps_gta.sort_index(axis=1)
    bps_wind, bps_gta = get_inter_frame([bps_wind, bps_gta])
Exemple #5
0
 def unify_size(self):
     #size
     size = read_df('size', 'M')
     mktCap_ff = read_df('mktCap_ff', 'M')
     size_ff = read_df('size_ff', 'M')
     #index:t
     #columns:sid
     size = size.stack()
     size.name = 'size'
     mktCap_ff = mktCap_ff.stack()
     mktCap_ff.name = 'mktCap_ff'
     size_ff = size_ff.stack()
     size_ff.name = 'size_ff'
     comb = pd.concat([size, mktCap_ff, size_ff], axis=1)
     return comb
Exemple #6
0
def get_momentum():
    stockRetM = read_df('stockRetM', 'M')
    stk = stockRetM.stack()
    stk.index.names = ['t', 'sid']
    #lagged 1 month
    d_lag = {
        'mom': [
            12, 9
        ],  #since the window is 11,and we do not use the value of time t,so,here we set 12 rather than 11
        'r12': [13, 10],
        'r6': [7, 5]
    }
    #nonlagged
    d_nonlag = {'R12M': [12, 10], 'R9M': [9, 7], 'R6M': [6, 5], 'R3M': [3, 3]}
    ss = []
    names = []
    for bn, bp in d_lag.items():
        ser = stk.groupby('sid').apply(lambda s: _before(s, bp[0], bp[1]))
        ss.append(ser)
        names.append(bn)

    for un, up in d_nonlag.items():
        ser = stk.groupby('sid').apply(lambda s: _upto(s, up[0], up[1]))
        ss.append(ser)
        names.append(un)

    momentum = pd.concat(ss, axis=1, keys=names)
    momentum = momentum * 100
    momentum.to_csv(os.path.join(DATA_PATH, 'momentum.csv'))
Exemple #7
0
def get_rev():
    stockRetM = read_df('stockRetM', 'M')
    rev = stockRetM * 100
    rev = rev.stack().to_frame()
    rev.columns = ['reversal']
    rev.index.names = ['t', 'sid']
    rev.to_csv(os.path.join(DATA_PATH, 'reversal.csv'))
Exemple #8
0
 def unify_eretM(self):
     #eretM
     eretM = read_df('eretM', 'M')
     eretM = eretM.stack().to_frame()
     eretM.columns = ['eretM']
     eretM.index.names = ['t', 'sid']
     return eretM
Exemple #9
0
 def unify_size(self):
     #size
     capM = read_df('capM', 'M')
     size = read_df('size', 'M')
     mktCap_ff = read_df('mktCap_ff', 'M')
     size_ff = read_df('size_ff', 'M')
     #index:t
     #columns:sid
     capM = capM.stack()
     capM.name = 'mktCap'
     size = size.stack()
     size.name = 'size'
     mktCap_ff = mktCap_ff.stack()
     mktCap_ff.name = 'mktCap_ff'
     size_ff = size_ff.stack()
     size_ff.name = 'size_ff'
     comb = pd.concat([capM, size, mktCap_ff, size_ff], axis=1)
     comb.index.names = ['t', 'sid']
     return comb
Exemple #10
0
def cal_market_states():
    '''
    market states:
        search for 'market state' in zoter
    1. Cheema and Nartea, “Momentum Returns, Market States, and Market Dynamics.”  chapter 3.1:
    Following Chui et al. (2010), we set stocks with monthly returns greater (lower) than 100 (−95) percent equal to 100
(−95) percent to avoid the influence of extreme returns and any possible data recording errors.

    :return:
    '''
    upDown = read_df('upDown', 'M')
    pass
Exemple #11
0
    def _combine_all_data(self):
        ret = read_df('stockRetM', freq='M')
        rf = read_df('rfM', freq='M')
        eret = ret.sub(rf['rf'], axis=0)
        eret = eret.stack()
        eret.name = 'eret'
        #TODO: create a df to store eret

        all_indicators = [
            ind for l_values in self.information.values() for ind in l_values
        ]
        #TODO:
        '''
        all the data are shift forward one month except for eret,so the index denotes time t+1,
        and all the data except for the eret are from time t,only eret are from time t+1.We adjust 
        the dataset for these reasons:
        1. we will sort the indicators in time t to construct portfolios and analyse the eret in time
            t+1
        2. We need to make sure that the index for eret is corresponding to the time it was calcualted.
            If we shift back the eret in this place (rather than shift forward the other indicators),we have
            to shift forward eret again when we regress the portfolio eret on mktRetM in the function _alpha 
            in template.py
            
        
        '''
        dfs = [eret] + [
            read_df(ind, 'M').shift(1).stack() for ind in all_indicators
        ]
        data = pd.concat(dfs, axis=1, keys=['eret'] + all_indicators)
        data.index.names = ['t', 'sid']

        #add mktRetM
        mktRetM = read_df('mktRetM', freq='M')
        mktRetM.index.name = 't'

        data = data.join(
            mktRetM)  #combine multiIndex dataframe with single index dataframe
        #truncate the sample
        return data[data.index.get_level_values('t').year >= 1996]
Exemple #12
0
    def unify_liquidity(self):
        illiq = pd.read_csv(os.path.join(DATA_PATH, 'illiq.csv'),
                            index_col=[0, 1],
                            parse_dates=True)
        illiq = illiq.stack().unstack('type').head()
        illiq.index.names = ['t', 'sid']

        liqBeta = read_df('liqBeta', 'M')
        liqBeta = liqBeta.stack()
        liqBeta.index.names = ['t', 'sid']
        liqBeta.name = 'liqBeta'

        comb = pd.concat([illiq, liqBeta], axis=1).head()
        return comb
Exemple #13
0
    def unify_capM(self):
        '''
        market capitalization

        Usually,the market capitalization is used as weight and we use this value at time t

        :return:
        '''
        capM = read_df('capM', 'M')
        capM = capM.stack().to_frame()
        capM.index.name = 't'
        capM.columns = ['capM']
        capM.index.names = ['t', 'sid']
        return capM
Exemple #14
0
def floor_price(df, clsPrice=5.0):
    '''
    the minimum close price is 5

    :param df:
    :param clsPrice:
    :return:
    '''
    stockCloseM = read_df('stockCloseM', 'M')
    stockCloseM.columns = stockCloseM.columns.astype(str)

    valid = stockCloseM[stockCloseM >= 5.0].stack()
    df = filter_multiIndex(df, valid.index)
    return df
Exemple #15
0
def get_upDown():
    '''
    2. Cooper Michael J., Gutierrez Roberto C., and Hameed Allaudeen, “Market States and Momentum.”

    :return:
    '''
    mktRetM = read_df('mktRetM', 'M')
    windows = [12, 24, 36]
    series = []
    for window in windows:
        s = mktRetM['mktRetM'].rolling(window=window).sum()
        s = s.shift(1)
        s[s > 0] = 1
        s[s < 0] = -1
        series.append(s)

    upDown = pd.concat(series, axis=1, keys=windows)
    upDown.to_csv(os.path.join(DATA_PATH, 'upDown.csv'))
Exemple #16
0
def get_momentum():
    stockRetM = read_df('stockRetM', 'M')
    stk = stockRetM.stack()
    stk.index.names = ['t', 'sid']

    #lagged 1 month
    d_lag = {'mom': [11, 9], 'r12': [12, 10], 'r6': [6, 5]}

    #nonlagged
    d_nonlag = {'R12M': [12, 10], 'R9M': [9, 7], 'R6M': [6, 5], 'R3M': [3, 3]}

    def _cal_cumulated_return(s):
        return np.cumprod(s + 1)[-1] - 1

    def _before(s, interval, min_periods):
        #for d_before,do not include return of time t
        return s.rolling(interval, min_periods=min_periods).apply(
            lambda s: _cal_cumulated_return(s[:-1]))

    def _upto(s, interval, min_periods):
        return s.rolling(interval,
                         min_periods=min_periods).apply(_cal_cumulated_return)

    ss = []
    names = []
    for bn, bp in d_lag.items():
        ser = stk.groupby('sid').apply(lambda s: _before(s, bp[0], bp[1]))
        ss.append(ser)
        names.append(bn)

    for un, up in d_nonlag.items():
        ser = stk.groupby('sid').apply(lambda s: _upto(s, up[0], up[1]))
        ss.append(ser)
        names.append(un)

    momentum = pd.concat(ss, axis=1, keys=names)
    momentum = momentum * 100

    #TODO:which type to save staked or with different files
    for col in momentum.columns:
        momentum[col].unstack().to_csv(os.path.join(DATA_PATH, col + '.csv'))
Exemple #17
0
def get_hxz4M():
    '''
    D:\app\python27\zht\researchTopics\assetPricing\calFactors.py\get_hxz4Factors()

    :return:
    '''
    direc = r'E:\a\quantDb\researchTopics\assetPricing\hxz4\factor'

    fns = ['rsmb', 'ria', 'rroe']

    dfs = []
    for fn in fns:
        df = pd.read_csv(os.path.join(direc, fn + '.csv'), index_col=0)
        df.index.name = 't'
        df.columns = [fn]
        dfs.append(df)
    comb = pd.concat(dfs, axis=1)
    comb.index = pd.to_datetime(comb.index) + MonthEnd()
    ff3 = read_df('ff3M', 'M')
    comb['rp'] = ff3['rp']
    comb.to_csv(os.path.join(DATA_PATH, 'hxz4M.csv'))
Exemple #18
0
 def unify_hxz4M(self):
     hxz4M = read_df('hxz4M', 'M')
     return hxz4M
Exemple #19
0
 def unify_ff5M(self):
     ff5M = read_df('ff5M', 'M')
     return ff5M
Exemple #20
0
 def unify_capM(self):
     capM = read_df('capM', 'M')
     capM.index.name = 't'
     return capM
Exemple #21
0
 def unify_rpM(self):
     rpM = read_df('rpM', 'M')
     return rpM
Exemple #22
0
 def unify_hxz4M(self):
     hxz4M = read_df('hxz4M', 'M')
     hxz4M = _add_prefix(hxz4M, 'hxz4M')
     return hxz4M
Exemple #23
0
def get_rpD():
    rpD = read_df('ff3D', 'D')[['rp']]
    rpD.to_csv(os.path.join(DATA_PATH, 'rpD.csv'))
Exemple #24
0
def get_eretD():
    stockRetD = read_df('stockRetD', 'D')
    rfD = read_df('rfD', 'D')
    eretD = stockRetD.sub(rfD['rfD'], axis=0)
    eretD.to_csv(os.path.join(DATA_PATH, 'eretD.csv'))
Exemple #25
0
def get_eretM():
    stockRetM = read_df('stockRetM', 'M')
    rfM = read_df('rfM', 'M')
    eretM = stockRetM.sub(rfM['rfM'], axis=0)
    eretM.to_csv(os.path.join(DATA_PATH, 'eretM.csv'))
Exemple #26
0
 def __init__(self):
     self.mktRetM = read_df('mktRetM', 'M')
     self.ff3 = read_df('ff3', 'M')
Exemple #27
0
def get_rpM():
    rpM = read_df('ff3M', 'M')[['rp']]
    rpM.to_csv(os.path.join(DATA_PATH, 'rpM.csv'))
Exemple #28
0
 def unify_rfM(self):
     rfM = read_df('rfM', 'M')
     return rfM
Exemple #29
0
def get_liquidity_ps():
    df = read_gta('Liq_PSM_M')
    #MarketType==21   综合A股和创业板
    # 流通市值加权,but on the page 310,Bali use total market capilization
    condition1 = (df['MarketType'] == 21)
    condition2 = (df['ST'] == 1)  #delete the ST stocks

    df = df[condition1 & condition2][['Trdmnt', 'AggPS_os']]
    df.columns = ['t', 'rm']
    df = df.set_index('t')

    df.index = freq_end(df.index, 'M')
    df = df.sort_index()
    df['rm_ahead'] = df['rm'].shift(1)
    df['delta_rm'] = df['rm'] - df['rm'].shift(1)
    df['delta_rm_ahead'] = df['rm_ahead'] - df['rm_ahead'].shift(1)

    #df.groupby(lambda x:x.year).apply(lambda df:df.shape[0])
    #TODO: we don't know the length of window to regress.In this place,we use the five years history
    def regr(df):
        if df.shape[0] > 30:
            return sm.ols(formula='delta_rm ~ delta_rm_ahead + rm_ahead',
                          data=df).fit().resid[0]
        else:
            return np.NaN

    window = 60  # not exact 5 years
    lm = pd.Series(
        [regr(df.loc[:month][-window:].dropna()) for month in df.index],
        index=df.index)
    lm.name = 'lm'

    ret = read_df('stockRetM', freq='M')
    rf = read_df('rfM', freq='M')
    eret = ret.sub(rf['rf'], axis=0)
    eret = eret.stack()
    eret.index.names = ['t', 'sid']
    eret.name = 'eret'

    ff3 = read_df('ff3_gta', 'M')
    factors = pd.concat([ff3, lm], axis=1)

    comb = eret.to_frame().join(factors)

    def _for_one_month(df):
        if df.shape[0] >= 30:
            return sm.ols(formula='eret ~ rp + smb + hml + lm',
                          data=df).fit().params['lm']
        else:
            return np.NaN

    def _get_result(df):
        thresh = 30  #30 month
        if df.shape[0] > thresh:
            values = []
            sid = df.index[0][1]
            df = df.reset_index(level='sid', drop=True)
            months = df.index.tolist()[thresh:]
            for month in months:
                subdf = df.loc[:month][-60:]
                subdf = subdf.dropna()
                # df=df.reset_index(level='sid',drop=True).loc[:month].last(window)
                values.append(_for_one_month(subdf))
            print(sid)
            return pd.Series(values, index=months)

    result = comb.groupby('sid').apply(_get_result)
    result.unstack('sid').to_csv(os.path.join(DATA_PATH, 'liqBeta.csv'))
Exemple #30
0
 def unify_mktRetM(self):
     mktRetM = read_df('mktRetM', 'M')
     return mktRetM