def get_portRet_ts(): port = pd.read_csv(r'D:\quantDb\resset\PMONRET_FF.csv') q1 = 'Exchflg == 0' q2 = 'Mktflg == A' port = filterDf(port, [q1, q2]) port['Date'] = [d[:-3] for d in port['Date']] months = sorted(port['Date'].unique().tolist()) portRet_rs_tmv = pd.DataFrame() portRet_rs_mc = pd.DataFrame() for month in months: for i in range(1, 6): for j in range(1, 6): try: portRet_rs_tmv.loc[month, i * 10 + j] = port[ (port['Date'] == month) & (port['Sizeflg'] == i) & (port['BMflg'] == j)]['Pmonret_tmv'].values[0] except IndexError: portRet_rs_tmv.loc[month, i * 10 + j] = np.NaN try: portRet_rs_mc.loc[month, i * 10 + j] = port[(port['Date'] == month) & (port['Sizeflg'] == i) & (port['BMflg'] == j)]['Pmonret_mc'].values[0] except IndexError: portRet_rs_mc.loc[month, i * 10 + j] = np.NaN print month save_df(portRet_rs_tmv, 'portRet_rs_tmv') save_df(portRet_rs_mc, 'portRet_rs_mc')
def _validate_ff5_factorRet(): tbname='STK_MKT_FivefacMonth' df=pd.read_csv(os.path.join(sp,tbname+'.csv')) q='MarkettypeID == P9709' df=filterDf(df,q) df=df.set_index('TradingMonth') typeDict={1:'2x3',2:'2x2',3:'2x2x2x2'} for k,v in typeDict.iteritems(): smb=df[df['Portfolios']==k]['SMB1'].to_frame() smb['mysmb']=pd.read_csv(os.path.join(factorRetPath,'%s_smb.csv'%v),index_col=0)['smb'] hml=df[df['Portfolios']==k]['HML1'].to_frame() hml['myhml']=pd.read_csv(os.path.join(factorRetPath,'%s_hml.csv'%v),index_col=0)['hml'] rmw=df[df['Portfolios']==k]['RMW1'].to_frame() rmw['myrmw']=pd.read_csv(os.path.join(factorRetPath,'%s_rmw.csv'%v),index_col=0)['rmw'] cma=df[df['Portfolios']==k]['CMA1'].to_frame() cma['mycma']=pd.read_csv(os.path.join(factorRetPath,'%s_cma.csv'%v),index_col=0)['cma'] rp=df[df['Portfolios']==k]['RiskPremium1'].to_frame() rp['myrp']=pd.read_csv(os.path.join(bdp,'rp.csv'),index_col=0)['rp'] direc=os.path.join(validatePath,'%s'%v) if not os.path.exists(direc): os.makedirs(direc) smb.dropna(axis=0).cumsum().plot().get_figure().savefig(os.path.join(direc,'smb.png')) hml.dropna(axis=0).cumsum().plot().get_figure().savefig(os.path.join(direc,'hml.png')) rmw.dropna(axis=0).cumsum().plot().get_figure().savefig(os.path.join(direc,'rmw.png')) cma.dropna(axis=0).cumsum().plot().get_figure().savefig(os.path.join(direc,'cma.png')) rp.dropna(axis=0).cumsum().plot().get_figure().savefig(os.path.join(direc,'rp.png'))
def validate_portRet(): df = pd.read_csv(r'D:\quantDb\resset\PMONRET_FF.csv', index_col=0) q1 = 'Exchflg == 0' q2 = 'Mktflg == A' df = filterDf(df, [q1, q2]) mypr = get_df('portRet') mypr.columns = [int(float(col)) for col in mypr.columns] pr = pd.DataFrame() for date in sorted(df['Date'].unique().tolist()): month = date[:-3] for i in range(1, 6): for j in range(1, 6): try: pr.loc[month, i * 10 + j] = df[(df['Sizeflg'] == i) & (df['BMflg'] == j) & (df['Date'] == date)]['Pmonret_tmv'].values[0] except IndexError: pr.loc[month, i * 10 + j] = np.NaN pass print month pr = pr.dropna(axis=0, how='any') for i in range(1, 6): for j in range(1, 6): port = i * 10 + j tmp = pd.DataFrame() tmp['mypr'] = mypr[port] tmp['pr'] = pr[port] tmp.cumsum().plot().get_figure().savefig( r'D:\quantDb\researchTopics\crossSection\data\observe\portRet\%s.png' % port)
def get_bv(): name='bv' tbname='FI_T9' fldname='F091001A' #每股净资产 timefld='Accper' df = pd.read_csv(os.path.join(sp, tbname + '.csv')) q1 = 'Typrep == A' q2 = 'Accper endswith 12-31' # TODO: only need annual report q = [q1, q2] df = filterDf(df, q) colnames = ['Stkcd', timefld, fldname] df = df[colnames] subdfs = [] for stockId, x in list(df.groupby('Stkcd')): tmpdf = x[[timefld, fldname]] tmpdf = tmpdf.set_index(timefld) tmpdf.columns = [stockId] subdfs.append(tmpdf) table = pd.concat(subdfs, axis=1) table.index=[ind[:-3] for ind in table.index] table = table.sort_index(ascending=True) table.to_csv(os.path.join(tmpp,name+'.csv'))
def get_bv(): name = 'bv' tbname = 'FI_T9' fldname = 'F091001A' #每股净资产 timefld = 'Accper' df = pd.read_csv(os.path.join(sp, tbname + '.csv')) q1 = 'Typrep == A' q2 = 'Accper endswith 12-31' # TODO: only need annual report q = [q1, q2] df = filterDf(df, q) colnames = ['Stkcd', timefld, fldname] df = df[colnames] subdfs = [] for stockId, x in list(df.groupby('Stkcd')): tmpdf = x[[timefld, fldname]] tmpdf = tmpdf.set_index(timefld) tmpdf.columns = [stockId] subdfs.append(tmpdf) table = pd.concat(subdfs, axis=1) table.index = [ind[:-3] for ind in table.index] table = table.sort_index(ascending=True) save_df(table, name)
def get_rf(): df = pd.read_csv(os.path.join(sp, 'TRD_Nrrate.csv')) q = 'Nrr1 == NRI01' # TODO:TBC=国债票面利率 df = filterDf(df, q) colnames = ['Clsdt', 'Nrrmtdt'] df = df.sort_values('Clsdt') df = df[colnames] df = df.set_index('Clsdt') dates = pd.date_range(df.index[0], df.index[-1], freq='D') dates = [d.strftime('%Y-%m-%d') for d in dates] newdf = pd.DataFrame(index=dates) newdf['Nrrmtdt'] = df['Nrrmtdt'] newdf = newdf.fillna(method='ffill') newdf = newdf.reset_index() newdf['month'] = newdf['index'].apply( lambda x: '-'.join(x.split('-')[:-1])) avg = newdf.groupby('month').mean() avg = avg / 100 del avg.index.name avg.columns = ['rf'] save_df(avg, 'rf')
def _validate_ff5_factorRet(): tbname = 'STK_MKT_FivefacMonth' df = pd.read_csv(os.path.join(sp, tbname + '.csv')) q = 'MarkettypeID == P9709' df = filterDf(df, q) df = df.set_index('TradingMonth') typeDict = {1: '2x3', 2: '2x2', 3: '2x2x2x2'} for k, v in typeDict.iteritems(): smb = df[df['Portfolios'] == k]['SMB1'].to_frame() smb['mysmb'] = pd.read_csv(os.path.join(factorRetPath, '%s_smb.csv' % v), index_col=0)['smb'] hml = df[df['Portfolios'] == k]['HML1'].to_frame() hml['myhml'] = pd.read_csv(os.path.join(factorRetPath, '%s_hml.csv' % v), index_col=0)['hml'] rmw = df[df['Portfolios'] == k]['RMW1'].to_frame() rmw['myrmw'] = pd.read_csv(os.path.join(factorRetPath, '%s_rmw.csv' % v), index_col=0)['rmw'] cma = df[df['Portfolios'] == k]['CMA1'].to_frame() cma['mycma'] = pd.read_csv(os.path.join(factorRetPath, '%s_cma.csv' % v), index_col=0)['cma'] rp = df[df['Portfolios'] == k]['RiskPremium1'].to_frame() rp['myrp'] = pd.read_csv(os.path.join(bdp, 'rp.csv'), index_col=0)['rp'] direc = os.path.join(validatePath, '%s' % v) if not os.path.exists(direc): os.makedirs(direc) smb.dropna(axis=0).cumsum().plot().get_figure().savefig( os.path.join(direc, 'smb.png')) hml.dropna(axis=0).cumsum().plot().get_figure().savefig( os.path.join(direc, 'hml.png')) rmw.dropna(axis=0).cumsum().plot().get_figure().savefig( os.path.join(direc, 'rmw.png')) cma.dropna(axis=0).cumsum().plot().get_figure().savefig( os.path.join(direc, 'cma.png')) rp.dropna(axis=0).cumsum().plot().get_figure().savefig( os.path.join(direc, 'rp.png'))
def _get_indictor2(name, tbname, fldname, timefld='Trdmnt'): df = pd.read_csv(os.path.join(sp, tbname + '.csv')) q = 'Markettype in [1,4,16]' df = filterDf(df, q) colnames = ['Stkcd', timefld, fldname] df = df[colnames] subdfs = [] for stockId, x in list(df.groupby('Stkcd')): tmpdf = x[[timefld, fldname]] tmpdf = tmpdf.set_index(timefld) tmpdf.columns = [stockId] subdfs.append(tmpdf) table = pd.concat(subdfs, axis=1) table = table.sort_index(ascending=True) save_df(table, name)
def _get_indicator2(name,tbname,fldname,timefld='Trdmnt'): df = pd.read_csv(os.path.join(sp, tbname + '.csv')) q='Markettype in [1,4,16]' df=filterDf(df,q) colnames=['Stkcd',timefld,fldname] df = df[colnames] subdfs = [] for stockId, x in list(df.groupby('Stkcd')): tmpdf = x[[timefld, fldname]] tmpdf = tmpdf.set_index(timefld) tmpdf.columns = [stockId] subdfs.append(tmpdf) table = pd.concat(subdfs, axis=1) table = table.sort_index(ascending=True) table.to_csv(os.path.join(tmpp,name+'.csv'))
def _get_indictor1(name, tbname, fldname, timefld='Accper'): df = pd.read_csv(os.path.join(sp, tbname + '.csv')) df = df[df['Typrep'] == 'A'] q = 'Accper endswith 12-31' df = filterDf(df, q) colnames = ['Stkcd', timefld, fldname] df = df[colnames] subdfs = [] for stockId, x in list(df.groupby('Stkcd')): tmpdf = x[[timefld, fldname]] tmpdf = tmpdf.set_index(timefld) tmpdf.index = [ind[:-3] for ind in tmpdf.index] tmpdf.columns = [stockId] subdfs.append(tmpdf) table = pd.concat(subdfs, axis=1) table = table.sort_index(ascending=True) save_df(table, name)
def _get_indicator1(name,tbname,fldname,timefld='Accper'): df=pd.read_csv(os.path.join(sp,tbname+'.csv')) df=df[df['Typrep']=='A'] q='Accper endswith 12-31' df=filterDf(df,q) colnames=['Stkcd',timefld,fldname] df=df[colnames] subdfs=[] for stockId,x in list(df.groupby('Stkcd')): tmpdf=x[[timefld,fldname]] tmpdf=tmpdf.set_index(timefld) tmpdf.index=[ind[:-3] for ind in tmpdf.index] tmpdf.columns=[stockId] subdfs.append(tmpdf) table=pd.concat(subdfs,axis=1) table=table.sort_index(ascending=True) table.to_csv(os.path.join(tmpp,name+'.csv'))
def get_rm(): name = 'rm' dbname = '' tbname = 'TRD_Cnmont' fldname = 'Cmretwdos' timefld = 'Trdmnt' q = [] cols = [] df = pd.read_csv(os.path.join(sp, tbname + '.csv')) q = 'Markettype == 5' #综合A股市场 df = filterDf(df, q) colnames = [timefld, fldname] df = df[colnames] df = df.set_index('Trdmnt') df = df.sort_index() del df.index.name df.columns = ['rm'] save_df(df, 'rm')
def get_rm(): name='rm' dbname='' tbname='TRD_Cnmont' fldname='Cmretwdos' timefld='Trdmnt' q=[] cols=[] df = pd.read_csv(os.path.join(sp, tbname + '.csv')) q = 'Markettype == 5'#综合A股市场 df = filterDf(df, q) colnames = [timefld, fldname] df = df[colnames] df=df.set_index('Trdmnt') df=df.sort_index() del df.index.name df.columns=['rm'] df.to_csv(os.path.join(tmpp,'rm.csv'))
def get_mv(): name = 'mv' tbname = 'TRD_Mnth' fldname = 'Mclsprc' #月收盘价 timefld = 'Trdmnt' df = pd.read_csv(os.path.join(sp, tbname + '.csv')) q1 = 'Markettype in [1,4,16]' q2 = 'Trdmnt endswith 12' #TODO: only need the data in December q = [q1, q2] df = filterDf(df, q) colnames = ['Stkcd', timefld, fldname] df = df[colnames] subdfs = [] for stockId, x in list(df.groupby('Stkcd')): tmpdf = x[[timefld, fldname]] tmpdf = tmpdf.set_index(timefld) tmpdf.columns = [stockId] subdfs.append(tmpdf) table = pd.concat(subdfs, axis=1) table = table.sort_index(ascending=True) save_df(table, name)
def get_mv(): name='mv' tbname='TRD_Mnth' fldname='Mclsprc' #月收盘价 timefld='Trdmnt' df = pd.read_csv(os.path.join(sp, tbname + '.csv')) q1 = 'Markettype in [1,4,16]' q2= 'Trdmnt endswith 12' #TODO: only need the data in December q=[q1,q2] df = filterDf(df, q) colnames = ['Stkcd', timefld, fldname] df = df[colnames] subdfs = [] for stockId, x in list(df.groupby('Stkcd')): tmpdf = x[[timefld, fldname]] tmpdf = tmpdf.set_index(timefld) tmpdf.columns = [stockId] subdfs.append(tmpdf) table = pd.concat(subdfs, axis=1) table = table.sort_index(ascending=True) table.to_csv(os.path.join(tmpp,name+'.csv'))
def get_rf(): df = pd.read_csv(os.path.join(sp,'TRD_Nrrate.csv')) q = 'Nrr1 == NRI01' # TODO:TBC=国债票面利率 df = filterDf(df, q) colnames = ['Clsdt', 'Nrrmtdt'] df = df.sort_values('Clsdt') df = df[colnames] df = df.set_index('Clsdt') dates = pd.date_range(df.index[0], df.index[-1], freq='D') dates = [d.strftime('%Y-%m-%d') for d in dates] newdf = pd.DataFrame(index=dates) newdf['Nrrmtdt'] = df['Nrrmtdt'] newdf = newdf.fillna(method='ffill') newdf = newdf.reset_index() newdf['month'] = newdf['index'].apply(lambda x: '-'.join(x.split('-')[:-1])) avg = newdf.groupby('month').mean() avg = avg / 100 del avg.index.name avg.columns = ['rf'] avg.to_csv(os.path.join(tmpp,'rf.csv'))