Python read_df Exemples, dout.read_df Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : 3value.py Projet : huning2009/assetPricing2

def get_bm():
    '''
    this function can be bookmarked as a snippet of how to manipulate date index
    in Pandas

    A little different with the book,here we use be and me for one share,
    but the data in the book is for all floating shares.However,it doesn't
    affect the bm.

    :return:
    '''
    be = read_df('bps', 'M')
    be = be[be.index.month == 12]
    me = read_df('stockCloseY', 'M')
    be, me = get_inter_frame([be, me])
    bm = be / me
    bm[bm <= 0] = np.nan  #delete those sample with bm<0
    bm = bm.shift(1, freq='6M')

    newIndex = pd.date_range(bm.index[0], bm.index[-1], freq='M')
    bm = bm.reindex(index=newIndex)
    bm = bm.fillna(method='ffill', limit=11)
    bm.to_csv(os.path.join(DATA_PATH, 'bm.csv'))

    logbm = np.log(bm)
    logbm.to_csv(os.path.join(DATA_PATH, 'logbm.csv'))

Exemple #2

0

Afficher le fichier

Fichier : din.py Projet : zhanghaitao1/assetPricing2

def get_eretD():
    stockRetD = read_df('stockRetD', 'D')
    rfD = read_df('rfD', 'D')
    eretD = stockRetD.sub(rfD['rfD'], axis=0)
    # The date for stockRetD is buisiness date,but for rfD, it is calendar date.
    eretD = eretD.dropna(
        axis=0, how='all'
    )  # use this to ajust the index from calendar date to buisiness date
    eretD.to_csv(os.path.join(DATA_PATH, 'eretD.csv'))

Exemple #3

0

Afficher le fichier

Fichier : dataset.py Projet : huning2009/assetPricing2

 def unify_value(self):
     #value
     bm = read_df('bm', 'M')
     logbm = read_df('logbm', 'M')
     bm = bm.stack()
     bm.index.names = ['t', 'sid']
     logbm = logbm.stack()
     logbm.index.names = ['t', 'sid']
     comb = pd.concat([bm, logbm], axis=1, keys=['bm', 'logbm'])
     return comb

Exemple #4

0

Afficher le fichier

Fichier : 3value.py Projet : huning2009/assetPricing2

def compare_wind_gta_bps():
    '''
    the result is different a lot!!!

    :return:
    '''
    bps_wind = read_df('bps_wind', 'M')
    bps_gta = read_df('bps', 'M')
    bps_wind.columns = [str(int(col[:-3])) for col in bps_wind.columns]
    bps_wind = bps_wind.sort_index(axis=1)
    bps_gta = bps_gta.sort_index(axis=1)
    bps_wind, bps_gta = get_inter_frame([bps_wind, bps_gta])

Exemple #5

0

Afficher le fichier

Fichier : dataset.py Projet : huning2009/assetPricing2

 def unify_size(self):
     #size
     size = read_df('size', 'M')
     mktCap_ff = read_df('mktCap_ff', 'M')
     size_ff = read_df('size_ff', 'M')
     #index:t
     #columns:sid
     size = size.stack()
     size.name = 'size'
     mktCap_ff = mktCap_ff.stack()
     mktCap_ff.name = 'mktCap_ff'
     size_ff = size_ff.stack()
     size_ff.name = 'size_ff'
     comb = pd.concat([size, mktCap_ff, size_ff], axis=1)
     return comb

Exemple #6

0

Afficher le fichier

def get_momentum():
    stockRetM = read_df('stockRetM', 'M')
    stk = stockRetM.stack()
    stk.index.names = ['t', 'sid']
    #lagged 1 month
    d_lag = {
        'mom': [
            12, 9
        ],  #since the window is 11,and we do not use the value of time t,so,here we set 12 rather than 11
        'r12': [13, 10],
        'r6': [7, 5]
    }
    #nonlagged
    d_nonlag = {'R12M': [12, 10], 'R9M': [9, 7], 'R6M': [6, 5], 'R3M': [3, 3]}
    ss = []
    names = []
    for bn, bp in d_lag.items():
        ser = stk.groupby('sid').apply(lambda s: _before(s, bp[0], bp[1]))
        ss.append(ser)
        names.append(bn)

    for un, up in d_nonlag.items():
        ser = stk.groupby('sid').apply(lambda s: _upto(s, up[0], up[1]))
        ss.append(ser)
        names.append(un)

    momentum = pd.concat(ss, axis=1, keys=names)
    momentum = momentum * 100
    momentum.to_csv(os.path.join(DATA_PATH, 'momentum.csv'))

Exemple #7

0

Afficher le fichier

def get_rev():
    stockRetM = read_df('stockRetM', 'M')
    rev = stockRetM * 100
    rev = rev.stack().to_frame()
    rev.columns = ['reversal']
    rev.index.names = ['t', 'sid']
    rev.to_csv(os.path.join(DATA_PATH, 'reversal.csv'))

Exemple #8

0

Afficher le fichier

 def unify_eretM(self):
     #eretM
     eretM = read_df('eretM', 'M')
     eretM = eretM.stack().to_frame()
     eretM.columns = ['eretM']
     eretM.index.names = ['t', 'sid']
     return eretM

Exemple #9

0

Afficher le fichier

 def unify_size(self):
     #size
     capM = read_df('capM', 'M')
     size = read_df('size', 'M')
     mktCap_ff = read_df('mktCap_ff', 'M')
     size_ff = read_df('size_ff', 'M')
     #index:t
     #columns:sid
     capM = capM.stack()
     capM.name = 'mktCap'
     size = size.stack()
     size.name = 'size'
     mktCap_ff = mktCap_ff.stack()
     mktCap_ff.name = 'mktCap_ff'
     size_ff = size_ff.stack()
     size_ff.name = 'size_ff'
     comb = pd.concat([capM, size, mktCap_ff, size_ff], axis=1)
     comb.index.names = ['t', 'sid']
     return comb

Exemple #10

0

Afficher le fichier

def cal_market_states():
    '''
    market states:
        search for 'market state' in zoter
    1. Cheema and Nartea, “Momentum Returns, Market States, and Market Dynamics.”  chapter 3.1:
    Following Chui et al. (2010), we set stocks with monthly returns greater (lower) than 100 (−95) percent equal to 100
(−95) percent to avoid the influence of extreme returns and any possible data recording errors.

    :return:
    '''
    upDown = read_df('upDown', 'M')
    pass

Exemple #11

0

Afficher le fichier

    def _combine_all_data(self):
        ret = read_df('stockRetM', freq='M')
        rf = read_df('rfM', freq='M')
        eret = ret.sub(rf['rf'], axis=0)
        eret = eret.stack()
        eret.name = 'eret'
        #TODO: create a df to store eret

        all_indicators = [
            ind for l_values in self.information.values() for ind in l_values
        ]
        #TODO:
        '''
        all the data are shift forward one month except for eret,so the index denotes time t+1,
        and all the data except for the eret are from time t,only eret are from time t+1.We adjust 
        the dataset for these reasons:
        1. we will sort the indicators in time t to construct portfolios and analyse the eret in time
            t+1
        2. We need to make sure that the index for eret is corresponding to the time it was calcualted.
            If we shift back the eret in this place (rather than shift forward the other indicators),we have
            to shift forward eret again when we regress the portfolio eret on mktRetM in the function _alpha 
            in template.py
            
        
        '''
        dfs = [eret] + [
            read_df(ind, 'M').shift(1).stack() for ind in all_indicators
        ]
        data = pd.concat(dfs, axis=1, keys=['eret'] + all_indicators)
        data.index.names = ['t', 'sid']

        #add mktRetM
        mktRetM = read_df('mktRetM', freq='M')
        mktRetM.index.name = 't'

        data = data.join(
            mktRetM)  #combine multiIndex dataframe with single index dataframe
        #truncate the sample
        return data[data.index.get_level_values('t').year >= 1996]

Exemple #12

0

Afficher le fichier

Fichier : dataset.py Projet : huning2009/assetPricing2

    def unify_liquidity(self):
        illiq = pd.read_csv(os.path.join(DATA_PATH, 'illiq.csv'),
                            index_col=[0, 1],
                            parse_dates=True)
        illiq = illiq.stack().unstack('type').head()
        illiq.index.names = ['t', 'sid']

        liqBeta = read_df('liqBeta', 'M')
        liqBeta = liqBeta.stack()
        liqBeta.index.names = ['t', 'sid']
        liqBeta.name = 'liqBeta'

        comb = pd.concat([illiq, liqBeta], axis=1).head()
        return comb

Exemple #13

0

Afficher le fichier

    def unify_capM(self):
        '''
        market capitalization

        Usually,the market capitalization is used as weight and we use this value at time t

        :return:
        '''
        capM = read_df('capM', 'M')
        capM = capM.stack().to_frame()
        capM.index.name = 't'
        capM.columns = ['capM']
        capM.index.names = ['t', 'sid']
        return capM

Exemple #14

0

Afficher le fichier

def floor_price(df, clsPrice=5.0):
    '''
    the minimum close price is 5

    :param df:
    :param clsPrice:
    :return:
    '''
    stockCloseM = read_df('stockCloseM', 'M')
    stockCloseM.columns = stockCloseM.columns.astype(str)

    valid = stockCloseM[stockCloseM >= 5.0].stack()
    df = filter_multiIndex(df, valid.index)
    return df

Exemple #15

0

Afficher le fichier

def get_upDown():
    '''
    2. Cooper Michael J., Gutierrez Roberto C., and Hameed Allaudeen, “Market States and Momentum.”

    :return:
    '''
    mktRetM = read_df('mktRetM', 'M')
    windows = [12, 24, 36]
    series = []
    for window in windows:
        s = mktRetM['mktRetM'].rolling(window=window).sum()
        s = s.shift(1)
        s[s > 0] = 1
        s[s < 0] = -1
        series.append(s)

    upDown = pd.concat(series, axis=1, keys=windows)
    upDown.to_csv(os.path.join(DATA_PATH, 'upDown.csv'))

Exemple #16

0

Afficher le fichier

def get_momentum():
    stockRetM = read_df('stockRetM', 'M')
    stk = stockRetM.stack()
    stk.index.names = ['t', 'sid']

    #lagged 1 month
    d_lag = {'mom': [11, 9], 'r12': [12, 10], 'r6': [6, 5]}

    #nonlagged
    d_nonlag = {'R12M': [12, 10], 'R9M': [9, 7], 'R6M': [6, 5], 'R3M': [3, 3]}

    def _cal_cumulated_return(s):
        return np.cumprod(s + 1)[-1] - 1

    def _before(s, interval, min_periods):
        #for d_before,do not include return of time t
        return s.rolling(interval, min_periods=min_periods).apply(
            lambda s: _cal_cumulated_return(s[:-1]))

    def _upto(s, interval, min_periods):
        return s.rolling(interval,
                         min_periods=min_periods).apply(_cal_cumulated_return)

    ss = []
    names = []
    for bn, bp in d_lag.items():
        ser = stk.groupby('sid').apply(lambda s: _before(s, bp[0], bp[1]))
        ss.append(ser)
        names.append(bn)

    for un, up in d_nonlag.items():
        ser = stk.groupby('sid').apply(lambda s: _upto(s, up[0], up[1]))
        ss.append(ser)
        names.append(un)

    momentum = pd.concat(ss, axis=1, keys=names)
    momentum = momentum * 100

    #TODO:which type to save staked or with different files
    for col in momentum.columns:
        momentum[col].unstack().to_csv(os.path.join(DATA_PATH, col + '.csv'))

Exemple #17

0

Afficher le fichier

Fichier : din.py Projet : Jmaihuire/assetPricing2

def get_hxz4M():
    '''
    D:\app\python27\zht\researchTopics\assetPricing\calFactors.py\get_hxz4Factors()

    :return:
    '''
    direc = r'E:\a\quantDb\researchTopics\assetPricing\hxz4\factor'

    fns = ['rsmb', 'ria', 'rroe']

    dfs = []
    for fn in fns:
        df = pd.read_csv(os.path.join(direc, fn + '.csv'), index_col=0)
        df.index.name = 't'
        df.columns = [fn]
        dfs.append(df)
    comb = pd.concat(dfs, axis=1)
    comb.index = pd.to_datetime(comb.index) + MonthEnd()
    ff3 = read_df('ff3M', 'M')
    comb['rp'] = ff3['rp']
    comb.to_csv(os.path.join(DATA_PATH, 'hxz4M.csv'))

Exemple #18

0

Afficher le fichier

Fichier : dataset.py Projet : huning2009/assetPricing2

 def unify_hxz4M(self):
     hxz4M = read_df('hxz4M', 'M')
     return hxz4M

Exemple #19

0

Afficher le fichier

Fichier : dataset.py Projet : huning2009/assetPricing2

 def unify_ff5M(self):
     ff5M = read_df('ff5M', 'M')
     return ff5M

Exemple #20

0

Afficher le fichier

Fichier : dataset.py Projet : huning2009/assetPricing2

 def unify_capM(self):
     capM = read_df('capM', 'M')
     capM.index.name = 't'
     return capM

Exemple #21

0

Afficher le fichier

Fichier : dataset.py Projet : huning2009/assetPricing2

 def unify_rpM(self):
     rpM = read_df('rpM', 'M')
     return rpM

Exemple #22

0

Afficher le fichier

 def unify_hxz4M(self):
     hxz4M = read_df('hxz4M', 'M')
     hxz4M = _add_prefix(hxz4M, 'hxz4M')
     return hxz4M

Exemple #23

0

Afficher le fichier

Fichier : din.py Projet : Jmaihuire/assetPricing2

def get_rpD():
    rpD = read_df('ff3D', 'D')[['rp']]
    rpD.to_csv(os.path.join(DATA_PATH, 'rpD.csv'))

Exemple #24

0

Afficher le fichier

Fichier : din.py Projet : Jmaihuire/assetPricing2

def get_eretD():
    stockRetD = read_df('stockRetD', 'D')
    rfD = read_df('rfD', 'D')
    eretD = stockRetD.sub(rfD['rfD'], axis=0)
    eretD.to_csv(os.path.join(DATA_PATH, 'eretD.csv'))

Exemple #25

0

Afficher le fichier

Fichier : din.py Projet : Jmaihuire/assetPricing2

def get_eretM():
    stockRetM = read_df('stockRetM', 'M')
    rfM = read_df('rfM', 'M')
    eretM = stockRetM.sub(rfM['rfM'], axis=0)
    eretM.to_csv(os.path.join(DATA_PATH, 'eretM.csv'))

Exemple #26

0

Afficher le fichier

Fichier : template.py Projet : huning2009/assetPricing1

 def __init__(self):
     self.mktRetM = read_df('mktRetM', 'M')
     self.ff3 = read_df('ff3', 'M')

Exemple #27

0

Afficher le fichier

Fichier : din.py Projet : Jmaihuire/assetPricing2

def get_rpM():
    rpM = read_df('ff3M', 'M')[['rp']]
    rpM.to_csv(os.path.join(DATA_PATH, 'rpM.csv'))

Exemple #28

0

Afficher le fichier

Fichier : dataset.py Projet : huning2009/assetPricing2

 def unify_rfM(self):
     rfM = read_df('rfM', 'M')
     return rfM

Exemple #29

0

Afficher le fichier

def get_liquidity_ps():
    df = read_gta('Liq_PSM_M')
    #MarketType==21   综合A股和创业板
    # 流通市值加权，but on the page 310,Bali use total market capilization
    condition1 = (df['MarketType'] == 21)
    condition2 = (df['ST'] == 1)  #delete the ST stocks

    df = df[condition1 & condition2][['Trdmnt', 'AggPS_os']]
    df.columns = ['t', 'rm']
    df = df.set_index('t')

    df.index = freq_end(df.index, 'M')
    df = df.sort_index()
    df['rm_ahead'] = df['rm'].shift(1)
    df['delta_rm'] = df['rm'] - df['rm'].shift(1)
    df['delta_rm_ahead'] = df['rm_ahead'] - df['rm_ahead'].shift(1)

    #df.groupby(lambda x:x.year).apply(lambda df:df.shape[0])
    #TODO: we don't know the length of window to regress.In this place,we use the five years history
    def regr(df):
        if df.shape[0] > 30:
            return sm.ols(formula='delta_rm ~ delta_rm_ahead + rm_ahead',
                          data=df).fit().resid[0]
        else:
            return np.NaN

    window = 60  # not exact 5 years
    lm = pd.Series(
        [regr(df.loc[:month][-window:].dropna()) for month in df.index],
        index=df.index)
    lm.name = 'lm'

    ret = read_df('stockRetM', freq='M')
    rf = read_df('rfM', freq='M')
    eret = ret.sub(rf['rf'], axis=0)
    eret = eret.stack()
    eret.index.names = ['t', 'sid']
    eret.name = 'eret'

    ff3 = read_df('ff3_gta', 'M')
    factors = pd.concat([ff3, lm], axis=1)

    comb = eret.to_frame().join(factors)

    def _for_one_month(df):
        if df.shape[0] >= 30:
            return sm.ols(formula='eret ~ rp + smb + hml + lm',
                          data=df).fit().params['lm']
        else:
            return np.NaN

    def _get_result(df):
        thresh = 30  #30 month
        if df.shape[0] > thresh:
            values = []
            sid = df.index[0][1]
            df = df.reset_index(level='sid', drop=True)
            months = df.index.tolist()[thresh:]
            for month in months:
                subdf = df.loc[:month][-60:]
                subdf = subdf.dropna()
                # df=df.reset_index(level='sid',drop=True).loc[:month].last(window)
                values.append(_for_one_month(subdf))
            print(sid)
            return pd.Series(values, index=months)

    result = comb.groupby('sid').apply(_get_result)
    result.unstack('sid').to_csv(os.path.join(DATA_PATH, 'liqBeta.csv'))

Exemple #30

0

Afficher le fichier

Fichier : dataset.py Projet : huning2009/assetPricing2

 def unify_mktRetM(self):
     mktRetM = read_df('mktRetM', 'M')
     return mktRetM