def get_amihud_illiq(): df = read_gta('TRD_Dalyr') df = df[['Stkcd', 'Trddt', 'Dretwd', 'Dnvaltrd']] df.columns = ['sid', 't', 'ret', 'volume'] df['t'] = freq_end(df['t'], 'D') df = df.set_index(['t', 'sid']) if not df.index.is_monotonic_increasing: df = df.sort_index( level='t' ) #TODO: gta's data is not monotonic_increasing ,add this two row to other scripts dict = OrderedDict({'1M': 15, '3M': 50, '6M': 100, '12M': 200}) result = groupby_rolling(df, 'illiq', dict, _amihud) result.index.names = ['type', 't'] ln_result = np.log(result) ln_result = ln_result.reset_index() ln_result['type'] = 'ln_' + ln_result['type'].astype(str) ln_result = ln_result.set_index(['type', 't']) illiq = pd.concat([result, ln_result], axis=0) #TODO:use valid observation for the whole project as page 276 # adjust the format of the DataFrame illiq.columns = pd.Index(illiq.columns.astype(str), illiq.columns.name) illiq = illiq.reset_index() illiq['t'] = freq_end(illiq['t'], 'M') illiq = illiq.set_index(['type', 't']) illiq = illiq.stack().unstack(level='type') #TODO: The data is really noisy,refer to outliers figures for details save(illiq, 'illiq')
def get_stockCloseY(): tbname = 'TRD_Year' varname = 'Yclsprc' indname = 'Trdynt' colname = 'Stkcd' fn = 'stockCloseY' path = os.path.join(DATA_PATH, fn + '.csv') table = _readFromSrc(tbname) df = pd.pivot_table(table, varname, indname, colname) df.index = freq_end(df.index, 'Y') df.to_csv(path)
def get_pu(): ''' policy uncertainty :return: ''' url = r'http://www.policyuncertainty.com/media/China_Policy_Uncertainty_Data.xlsx' pu = pd.read_excel(url, skip_footer=1) pu.columns = ['year', 'month', 'pu'] pu['t'] = pu['year'].map(str) + '-' + pu['month'].map(str) pu['t'] = freq_end(pu['t'], 'M') pu = pu.set_index('t') pu = pu['pu'] save(pu, 'pu')
def get_ff3D(): tbname = 'STK_MKT_ThrfacDay' df = read_gta(tbname) condition1 = df['MarkettypeID'] == 'P9707' # P9709 全部A股市场包含沪深A股和创业板. # 流通市值加权 df = df[condition1][['TradingDate', 'RiskPremium1', 'SMB1', 'HML1']] df.columns = ['t', 'rp', 'smb', 'hml'] df.columns.name = 'type' df = df.set_index('t') df.index = freq_end(df.index, 'D') save(df, 'ff3D') return df
def get_ffcM(): df = read_gta('STK_MKT_CarhartFourFactors') #trick: P9709 全部A股市场包含沪深A股和创业板 #trick: 流通市值加权 df = df[df['MarkettypeID'] == 'P9709'][[ 'TradingMonth', 'RiskPremium1', 'SMB1', 'HML1', 'UMD2' ]] df.columns = ['t', 'rp', 'smb', 'hml', 'mom'] df.columns.name = 'type' df = df.set_index('t') df.index = freq_end(df.index, 'M') save(df, 'ffcM')
def get_ff3M(): #fixme: there are some abnormal values df = read_gta('STK_MKT_ThrfacMonth') #trick:P9709 全部A股市场包含沪深A股和创业板 #trick:流通市值加权 df = df[df['MarkettypeID'] == 'P9709'][[ 'TradingMonth', 'RiskPremium1', 'SMB1', 'HML1' ]] df.columns = ['t', 'rp', 'smb', 'hml'] df = df.set_index('t') df.index = freq_end(df.index, 'M') df.columns.name = 'type' save(df, 'ff3M') return df
def get_ff5M(): df = read_gta('STK_MKT_FivefacMonth') #trick:P9709 全部A股市场包含沪深A股和创业板 #trick:流通市值加权 #trick: 2*3 投资组合 df = df[(df['MarkettypeID'] == 'P9709') & (df['Portfolios'] == 1)][[ 'TradingMonth', 'RiskPremium1', 'SMB1', 'HML1', 'RMW1', 'CMA1' ]] df.columns = ['t', 'rp', 'smb', 'hml', 'rmw', 'cma'] df.columns.name = 'type' df = df.set_index('t') df.index = freq_end(df.index, 'M') # df.index.name='t' save(df, 'ff5M')
def get_mktRetM(): tbname = 'TRD_Cnmont' indVar = 'Trdmnt' targetVar = 'Cmretwdos' #trick:考虑现金红利再投资的综合日市场回报率(流通市值加权平均法) df = read_gta(tbname) df = df[df['Markettype'] == 21] # 21=综合A股和创业板 df = df.set_index(indVar) df.index = freq_end(df.index, 'M') df.index.name = 't' s = df[targetVar] s.name = 'mktRetM' save(s, 'mktRetM')
def get_capM(): ''' get stock monthly circulation market capitalization :return: ''' tbname = 'TRD_Mnth' varname = 'Msmvosd' #trick:月个股流通市值,单位 千元 # TODO:the unit convert it to million as Cakici, Chan, and Topyan, “Cross-Sectional Stock Return Predictability in China.” indname = 'Trdmnt' colname = 'Stkcd' df = read_df_from_gta(tbname, varname, indname, colname) df.index.name = 't' df.index = freq_end(df.index, 'M') df.columns = df.columns.astype(str) df.columns.name = 'sid' save(df, 'capM')
def get_mktRetM(): newName = 'mktRetM' path = os.path.join(DATA_PATH, newName + '.csv') tbname = 'TRD_Cnmont' indVar = 'Trdmnt' targetVar = 'Cmretwdos' # 考虑现金红利再投资的综合日市场回报率(流通市值加权平均法) df = _readFromSrc(tbname) df = df[df['Markettype'] == 21] # 21=综合A股和创业板 df = df.set_index(indVar) df = df.sort_index() df = df[[targetVar]] del df.index.name df.columns = [newName] df.index = freq_end(df.index, 'M') df.to_csv(path)
def get_ff3M_resset(): ''' from resset data :return: ''' tbname = 'THRFACDAT_MONTHLY' df = read_resset(tbname) # 'Exchflg == 0' 所有交易所 # 'Mktflg == A' 只考虑A股 df = df[(df['Exchflg'] == 0) & (df['Mktflg'] == 'A')] df = df.set_index('Date') df.index = freq_end(df.index, 'M') df.index.name = 't' df = df[['Rmrf_tmv', 'Smb_tmv', 'Hml_tmv']] #weighted with tradable capitalization df.columns = ['rp', 'smb', 'hml'] df.columns.name = 'type' save(df, 'ff3M_resset')
def get_stockRetM(): ''' monthly stock return with dividend ''' tbname = 'TRD_Mnth' varname = 'Mretwd' #考虑现金红利再投资的收益 indname = 'Trdmnt' colname = 'Stkcd' df = read_df_from_gta(tbname, varname, indname, colname) #TODO: identify the axis and convert the axis automatically df.index.name = 't' df.columns.name = 'sid' df.index = freq_end(df.index, 'M') df.columns = df.columns.astype(str) save(df, 'stockRetM') return df
def get_amihud_illiq(): df = read_gta('TRD_Dalyr') df = df[['Stkcd', 'Trddt', 'Dretwd', 'Dnvaltrd']] df.columns = ['sid', 't', 'ret', 'volume'] df['t'] = freq_end(df['t'], 'D') df = df.set_index(['t', 'sid']) if not df.index.is_monotonic_increasing: df = df.sort_index( level='t' ) #TODO: gta's data is not monotonic_increasing ,add this two row to other scripts dict = OrderedDict({'1M': 15, '3M': 50, '6M': 100, '12M': 200}) result = groupby_rolling(df, 'illiq', dict, _amihud) result.index.names = ['type', 't'] ln_result = np.log(result) ln_result = ln_result.reset_index() ln_result['type'] = 'ln_' + ln_result['type'].astype(str) ln_result = ln_result.set_index(['type', 't']) illiq = pd.concat([result, ln_result], axis=0) #TODO:use valid observation for the whole project as page 276 illiq.to_csv(os.path.join(DATA_PATH, 'illiq.csv'))
def get_liquidity_ps(): df = read_gta('Liq_PSM_M') #MarketType==21 综合A股和创业板 # 流通市值加权,but on the page 310,Bali use total market capilization condition1 = (df['MarketType'] == 21) condition2 = (df['ST'] == 1) #delete the ST stocks df = df[condition1 & condition2][['Trdmnt', 'AggPS_os']] df.columns = ['t', 'rm'] df = df.set_index('t') df.index = freq_end(df.index, 'M') df = df.sort_index() df['rm_ahead'] = df['rm'].shift(1) df['delta_rm'] = df['rm'] - df['rm'].shift(1) df['delta_rm_ahead'] = df['rm_ahead'] - df['rm_ahead'].shift(1) #df.groupby(lambda x:x.year).apply(lambda df:df.shape[0]) #TODO: we don't know the length of window to regress.In this place,we use the five years history def regr(df): if df.shape[0] > 30: return sm.ols(formula='delta_rm ~ delta_rm_ahead + rm_ahead', data=df).fit().resid[0] else: return np.NaN window = 60 # not exact 5 years lm = pd.Series( [regr(df.loc[:month][-window:].dropna()) for month in df.index], index=df.index) lm.name = 'lm' ret = read_df('stockRetM', freq='M') rf = read_df('rfM', freq='M') eret = ret.sub(rf['rf'], axis=0) eret = eret.stack() eret.index.names = ['t', 'sid'] eret.name = 'eret' ff3 = read_df('ff3_gta', 'M') factors = pd.concat([ff3, lm], axis=1) comb = eret.to_frame().join(factors) def _for_one_month(df): if df.shape[0] >= 30: return sm.ols(formula='eret ~ rp + smb + hml + lm', data=df).fit().params['lm'] else: return np.NaN def _get_result(df): thresh = 30 #30 month if df.shape[0] > thresh: values = [] sid = df.index[0][1] df = df.reset_index(level='sid', drop=True) months = df.index.tolist()[thresh:] for month in months: subdf = df.loc[:month][-60:] subdf = subdf.dropna() # df=df.reset_index(level='sid',drop=True).loc[:month].last(window) values.append(_for_one_month(subdf)) print(sid) return pd.Series(values, index=months) result = comb.groupby('sid').apply(_get_result) result.unstack('sid').to_csv(os.path.join(DATA_PATH, 'liqBeta.csv'))
def get_liquidity(): # Turnover rate df1 = read_gta('Liq_Tover_M', index_col=0) df1 = df1[df1['Status'] == 'A'] # A=正常交易 df1 = df1[[ 'Stkcd', 'Trdmnt', 'ToverOsM', 'ToverTlM', 'ToverOsMAvg', 'ToverTlMAvg' ]] df1.columns = [ 'sid', 't', 'turnover1', 'turnover2', 'turnover3', 'turnover4' ] df1['t'] = freq_end(df1['t'], 'M') df1['sid'] = df1['sid'].astype(str) df1 = df1.set_index(['t', 'sid']) df1 = df1.astype(float) # Amihud df2 = read_gta('Liq_Amihud_M', index_col=0) df2 = df2[df2['Status'] == 'A'] # A=正常交易 df2 = df2[['Stkcd', 'Trdmnt', 'ILLIQ_M']] # 月内日均换手率(流通股数) df2.columns = ['sid', 't', 'amihud'] df2['t'] = freq_end(df2['t'], 'M') df2['sid'] = df2['sid'].astype(str) df2 = df2.set_index(['t', 'sid']) df2 = df2.astype(float) ''' roll1,roll2,zeros1 and zeros2 are note proper for portfolio analysis,since there are a lot of zeros in sample,which will cause errors in the program. ''' # roll # df3=read_gta('Liq_Roll_M',index_col=0) # df3 = df3[df3['Status'] == 'A'] # A=正常交易 # df3 = df3[['Stkcd', 'Trdmnt', 'Roll_M','Roll_Impact_M']] # 月内日均换手率(流通股数) # df3.columns = ['sid', 't', 'roll1','roll2'] # df3['t'] = freq_end(df3['t'], 'M') # df3['sid'] = df3['sid'].astype(str) # df3=df3.set_index(['t','sid']) # df3=df3.astype(float) # # Zeros # df4=read_gta('Liq_Zeros_M',index_col=0) # df4 = df4[df4['Status'] == 'A'] # A=正常交易 # df4 = df4[['Stkcd', 'Trdmnt', 'Zeros_M','Zeros_Impact_M']] # 月内日均换手率(流通股数) # df4.columns = ['sid', 't', 'zeros1','zeros2'] # df4['t'] = freq_end(df4['t'], 'M') # df4['sid'] = df4['sid'].astype(str) # df4=df4.set_index(['t','sid']) # df4=df4.astype(float) # Pastor Stambaugh df5 = read_gta('Liq_PS_M', index_col=0) df5 = df5[df5['Status'] == 'A'] # A=正常交易 df5 = df5[['Stkcd', 'Trdmnt', 'PSos', 'PStl']] # 月内日均换手率(流通股数) df5.columns = ['sid', 't', 'ps1', 'ps2'] df5['t'] = freq_end(df5['t'], 'M') df5['sid'] = df5['sid'].astype(str) df5 = df5.set_index(['t', 'sid']) df5 = df5.astype(float) # combine them x = pd.concat([df[~df.index.duplicated()] for df in [df1, df2, df5]], axis=1) x.columns.name = 'type' save(x, 'liquidity')