def __init__(self): self.tradingDateList = getTradingDateFromJY(para.startDate, para.endDate, ifTrade=True, Period='M') Factor = loadData(para = para.factor).BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :] self.Factor2 = pd.read_csv(para.result_path+'_'+para.factor2+'.csv',index_col=0) self.Price, self.LimitStatus, self.Status, self.listDateNum, self.Industry, self.Size = basic_data(para) self.Factor = stock_dif(Factor, self.LimitStatus) self.Factor.index = self.LimitStatus.index.copy() # self.Factor2 = stock_dif(Factor2, self.LimitStatus) # self.Factor2.index = self.LimitStatus.index.copy() # _, self.Factor2 = Gmain().every_month() # self.Factor2.columns = self.LimitStatus.columns.copy() pass
def __init__(self, para): # get trading date list as monthly frequancy self.tradingDateList = getTradingDateFromJY(para.startDate, para.endDate, ifTrade=True, Period='M') DATA = loadData(para.factor) Factor = DATA.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :] self.Price, self.LimitStatus, self.Status, self.listDateNum, self.Industry, self.Size = basic_data( para) self.Factor = stock_dif(Factor, self.LimitStatus) pass
def data_initial_F(para): tradingDateList = getTradingDateFromJY(20091231, para.endDate, ifTrade=True, Period='M') ################################# 涨跌停数据:1表示是涨停,-1表示跌停,0表示非涨跌停 UpDownLimitStatus = pd.read_hdf( dataPathPrefix + '\DataBase\Data_AShareEODPrices\BasicDailyFactor_UpDownLimitStatus.h5') LimitStatus = UpDownLimitStatus.loc[para.startDate:para.endDate, :] ################################# 盈利类因子 #################################### 提取ROA因子 ROA = loadData(para='ROA') ROA_ = ROA.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :] ROA_ = stock_dif(ROA_, LimitStatus) ROA_[ROA_ > 0] = 1 ROA_[ROA_ <= 0] = 0 ROA_.index = ROA.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :].index #################################### CFOA因子 CFO = loadData(para='CFO') CFO_ = CFO.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :] CFO_ = stock_dif(CFO_, LimitStatus) CFO_[CFO_ > 0] = 1 CFO_[CFO_ <= 0] = 0 CFO_.index = CFO.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :].index #################################### delta_ROA因子 GROAQ = loadData(para='GROAQ') GROAQ_ = GROAQ.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :] GROAQ_ = stock_dif(GROAQ_, LimitStatus) GROAQ_[GROAQ_ > 0] = 1 GROAQ_[GROAQ_ <= 0] = 0 GROAQ_.index = GROAQ.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :].index # #################################### Accrual 应计量 Accrual = CFO.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :] \ - ROA.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :] Accrual_ = Accrual.copy() Accrual_ = stock_dif(Accrual_, LimitStatus) Accrual_[Accrual_ < 0] = 1 Accrual_[Accrual_ >= 0] = 0 Accrual_.index = Accrual.loc[para.startDate:para.endDate, :].index # #################################### delta_leverage因子 BLEV = loadData(para='BLEV') delta_BLEV = BLEV.BasicDailyFactorAlpha.diff() delta_BLEV_ = delta_BLEV.loc[para.startDate:para.endDate, :] delta_BLEV_ = stock_dif(delta_BLEV_, LimitStatus) delta_BLEV_[delta_BLEV_ <= 0] = 1 delta_BLEV_[delta_BLEV_ > 0] = 0 delta_BLEV_.index = delta_BLEV.loc[para.startDate:para.endDate, :].index # ################################### delta_CurrentRatio因子 CurrentRatio = loadData(para='CurrentRatio') delta_CurrentRatio = CurrentRatio.BasicDailyFactorAlpha.diff() delta_CurrentRatio_ = delta_CurrentRatio.loc[para.startDate:para.endDate, :] delta_CurrentRatio_ = stock_dif(delta_CurrentRatio_, LimitStatus) delta_CurrentRatio_[delta_CurrentRatio_ <= 0] = 0 delta_CurrentRatio_[delta_CurrentRatio_ > 0] = 1 delta_CurrentRatio_.index = delta_CurrentRatio.loc[para.startDate:para.endDate, :].index logreturn = pd.read_csv(para.data_path + 'logreturn.csv', index_col=0) logreturn = logreturn.loc[para.startDate:para.endDate, :] # ################################### EG_OFFER 过去一年是否增发或配股 EG_OFFER = pd.read_csv(para.data_path + 'EG_OFFER.csv', index_col=0) column_num = len(logreturn.index) df_merge = pd.DataFrame(columns=logreturn.index, index=logreturn.columns) EG_OFFER_row = pd.merge(EG_OFFER, df_merge, how='inner', left_index=True, right_index=True) EG_OFFER = pd.merge(EG_OFFER_row, df_merge, how='outer', left_index=True, right_index=True) EG_OFFER = EG_OFFER.iloc[:, :-column_num] EG_OFFER = EG_OFFER.dropna(how='all', axis=1) EG_OFFER.columns = tradingDateList[:-1] EG_OFFER = EG_OFFER.T # ################################### delta_GrossProfitMargin GrossProfitMargin = loadData(para='GrossProfitMargin') delta_GrossProfitMargin = GrossProfitMargin.BasicDailyFactorAlpha.diff() delta_GrossProfitMargin_ = delta_GrossProfitMargin.loc[para.startDate:para.endDate, :] delta_GrossProfitMargin_ = stock_dif(delta_GrossProfitMargin_, LimitStatus) delta_GrossProfitMargin_[delta_GrossProfitMargin_ <= 0] = 0 delta_GrossProfitMargin_[delta_GrossProfitMargin_ > 0] = 1 delta_GrossProfitMargin_.index = delta_GrossProfitMargin.loc[para.startDate:para.endDate, :].index # ################################### delta_AssetsTurn AssetsTurn = loadData(para='AssetsTurn') delta_AssetsTurn = AssetsTurn.BasicDailyFactorAlpha.diff() delta_AssetsTurn_ = delta_AssetsTurn.loc[para.startDate:para.endDate, :] delta_AssetsTurn_ = stock_dif(delta_AssetsTurn_, LimitStatus) delta_AssetsTurn_[delta_AssetsTurn_ <= 0] = 0 delta_AssetsTurn_[delta_AssetsTurn_ > 0] = 1 delta_AssetsTurn_.index = delta_AssetsTurn.loc[para.startDate:para.endDate, :].index return ROA_,CFO_,GROAQ_,Accrual_,\ delta_BLEV_,delta_CurrentRatio_,EG_OFFER,\ delta_GrossProfitMargin_,delta_AssetsTurn_
def data_initial_G(para): ################################# 涨跌停数据:1表示是涨停,-1表示跌停,0表示非涨跌停 UpDownLimitStatus = pd.read_hdf( dataPathPrefix + '\DataBase\Data_AShareEODPrices\BasicDailyFactor_UpDownLimitStatus.h5') LimitStatus = UpDownLimitStatus.loc[para.startDate:para.endDate, :] ################################# ST/ST*数据: 1表示正常 StockTradeStatus = pd.read_hdf( dataPathPrefix + '\DataBase\Data_AShareTradeStatus\BasicDailyFactor_StockTradeStatus.h5') Status = StockTradeStatus.loc[para.startDate:para.endDate, :] ############################### 过去交易的天数 StockListDateNum = pd.read_hdf( dataPathPrefix + '\DataBase\Data_AShareDescription\BasicDailyFactor_StockListDateNum.h5') listDateNum = StockListDateNum.loc[para.startDate:para.endDate, :] ################################# 行业分类数据 Data_AShareIndustryClass = pd.read_hdf( dataPathPrefix + '\DataBase\Data_AShareIndustryClass\AShareIndustriesClassCITICSNew_FirstIndustries.h5') Industry = Data_AShareIndustryClass.loc[para.startDate:para.endDate, :] Industry = stock_dif(Industry, LimitStatus) Industry.index = Data_AShareIndustryClass.loc[para.startDate:para.endDate, :].index ################################# A股总市值 StockTotalMV = pd.read_hdf( dataPathPrefix + '\DataBase\Data_AShareEODDerivativeIndicator\BasicDailyFactor_StockTotalMV.h5') Size = StockTotalMV.loc[para.startDate:para.endDate, :] Size = stock_dif(Size, LimitStatus) Size.index = StockTotalMV.loc[para.startDate:para.endDate, :].index logreturn = pd.read_csv(para.data_path + 'logreturn.csv', index_col=0) logreturn = logreturn.loc[para.startDate:para.endDate, :] tradingDateList = getTradingDateFromJY(para.startDate, para.endDate, ifTrade=True, Period='M') ################################# 盈利类因子 #################################### 提取ROA因子 ROA = loadData(para='ROA') ROA_ = ROA.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :] ROA_ = stock_dif(ROA_, LimitStatus) ROA_.index = ROA.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :].index #################################### CFOA因子 CFO = loadData(para='CFO') CFO_ = CFO.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :] CFO_ = stock_dif(CFO_, LimitStatus) CFO_.index = CFO.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :].index # #################################### Accrual 应计量 Accrual = CFO.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :] \ - ROA.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :] Accrual_ = Accrual.copy() Accrual_ = stock_dif(Accrual_, LimitStatus) Accrual_.index = Accrual.loc[para.startDate:para.endDate, :].index # #################################### ROA的方差 ROA_row = ROA_ ROA_VAR_list = [] for i, Date in enumerate(tradingDateList): if i < para.backtestwindow: continue ROA_row = ROA_row.iloc[i - para.backtestwindow:i, :].dropna(axis=1, how='all') ROA_VAR = ROA_row.var() ROA_VAR_list.append(ROA_VAR) ROA_VAR = pd.DataFrame(ROA_VAR_list, index=tradingDateList[para.backtestwindow:]) ###################################### 营业收入同比增量率的方差 Sales_G_TTM = loadData(para='OperatingRevenueQYOY').BasicDailyFactorAlpha Sales_G_TTM = stock_dif(Sales_G_TTM, LimitStatus) Sales_G_TTM_list = [] for i, Date in enumerate(tradingDateList): if i < para.backtestwindow: continue Sales_G_TTM_row = Sales_G_TTM.iloc[i - para.backtestwindow:i, :].dropna(axis=1, how='all') Sales_G_TTM_VAR = Sales_G_TTM_row.var() Sales_G_TTM_list.append(Sales_G_TTM_VAR) Sales_G_TTM_VAR = pd.DataFrame(Sales_G_TTM_list, index=tradingDateList[para.backtestwindow:]) Sales_G_TTM_VAR = stock_dif(Sales_G_TTM_VAR, LimitStatus) Sales_G_TTM_VAR.index = tradingDateList[para.backtestwindow:] ##################################### R&D /总资产 # R&D数据是用中报和年报的数据填充,4月底开始填充去年年报,9月底开始填充当前中报 R_and_D = pd.read_csv(para.data_path + 'RandD.csv', index_col=0) column_num = len(logreturn.index) df_merge = pd.DataFrame(columns=logreturn.index, index=logreturn.columns) R_and_D_row = pd.merge(R_and_D.T, df_merge, how='inner', left_index=True, right_index=True) R_and_D_row_ = pd.merge(R_and_D_row, df_merge, how='outer', left_index=True, right_index=True) R_and_D_row_ = R_and_D_row_.iloc[:, :-column_num] R_and_D_row_ = R_and_D_row_.dropna(how='all', axis=1) R_and_D_row_.columns = tradingDateList[:-1] R_and_D_ = R_and_D_row_.T Total_MV = loadData(para='TotalMV').BasicDailyFactorAlpha RD_MV_list = [] for i, Date in enumerate(list(R_and_D.index)): RD_MV = R_and_D_.iloc[i, :] / Total_MV.loc[Date, :] RD_MV_list.append(RD_MV) RD_MV_ = pd.DataFrame(RD_MV_list, index=tradingDateList[:-1]) ###################################### 销售费用 /总资产 # 销售费用用季报数据填充,分段时间点为4月底(去年年报),8月底用今年中报,10月底用今年三季报 Sales = pd.read_csv(para.data_path + 'sales.csv', index_col=0) column_num = len(logreturn.index) df_merge = pd.DataFrame(columns=logreturn.index, index=logreturn.columns) Sales_row = pd.merge(Sales.T, df_merge, how='inner', left_index=True, right_index=True) Sales_row_ = pd.merge(Sales_row, df_merge, how='outer', left_index=True, right_index=True) Sales_row_ = Sales_row_.iloc[:, :-column_num] Sales_row_ = Sales_row_.dropna(how='all', axis=1) Sales_row_.columns = tradingDateList[:-1] Sales_ = Sales_row_.T Total_MV = loadData(para='TotalMV').BasicDailyFactorAlpha Sales_MV_list = [] for i, Date in enumerate(list(Sales_.index)): Sales__MV = Sales_.iloc[i, :] / Total_MV.loc[Date, :] Sales_MV_list.append(Sales__MV) Sales_MV_ = pd.DataFrame(Sales_MV_list, index=tradingDateList[:-1]) ###################################### 资本性支出 /总资产 # 资本性支出用季报数据填充,分段时间点为4月底(去年年报),8月底用今年中报,10月底用今年三季报 Expenditure = pd.read_csv(para.data_path + 'expenditure.csv', index_col=0) column_num = len(logreturn.index) df_merge = pd.DataFrame(columns=logreturn.index, index=logreturn.columns) Expenditure_row = pd.merge(Expenditure.T, df_merge, how='inner', left_index=True, right_index=True) Expenditure_row_ = pd.merge(Expenditure_row, df_merge, how='outer', left_index=True, right_index=True) Expenditure_row_ = Expenditure_row_.iloc[:, :-column_num] Expenditure_row_ = Expenditure_row_.dropna(how='all', axis=1) Expenditure_row_.columns = tradingDateList[:-1] Expenditure_ = Expenditure_row_.T Total_MV = loadData(para='TotalMV').BasicDailyFactorAlpha Expenditure_MV_list = [] for i, Date in enumerate(list(Expenditure_.index)): Expenditure_MV = Sales_.iloc[i, :] / Total_MV.loc[Date, :] Expenditure_MV_list.append(Expenditure_MV) Expenditure_MV_ = pd.DataFrame(Expenditure_MV_list, index=tradingDateList[:-1]) return ROA_,CFO_,Accrual_,\ ROA_VAR,Sales_G_TTM_VAR,\ RD_MV_,Sales_MV_,Expenditure_MV_
def every_month(self, para, synthesis_Method): Factor = pd.DataFrame() WIC = [] IC_list = [] for i, currentDate in enumerate(tqdm(self.tradingDateList[:-1])): # get lastDate for corresponding time frequancy(for return calculation) lastDate = self.tradingDateList[ self.tradingDateList.index(currentDate) - 1] # use different method to calculate the return # logreturn for short time period and simple return calculation for long time period if para.ret_calMethod == '对数': self.ret = np.log(self.Price.loc[currentDate, :] / self.Price.loc[lastDate, :]) elif para.ret_calMethod == '简单': self.ret = self.Price.loc[currentDate, :] / self.Price.loc[ lastDate, :] - 1 # set the basid dataFrame for each cross-section # index is the totle stock list self.dataFrame = pd.concat([ self.ret, self.LimitStatus.loc[currentDate, :], self.Status.loc[currentDate, :], self.listDateNum.loc[currentDate, :], self.Industry.loc[currentDate, :], self.Size.loc[currentDate, :] ], axis=1, sort=True) self.dataFrame.columns = [ 'RET', 'LimitStatus', 'Status', 'listDateNum', 'Industry', 'Size' ] # get all of the factor of corresponding date into the basic dataFrame for i, factor_name in enumerate(para.factorlist): factor_i = loadData(factor_name) factor_ii = factor_i.BasicDailyFactorAlpha.loc[ para.startDate:para.endDate, :] factor_ii = (factor_ii - np.min(factor_ii)) / ( np.max(factor_ii) - np.min(factor_ii)) self.dataFrame.loc[:, factor_name] = factor_ii.loc[currentDate, :] # copy the basic dataFrame before getting rid of unwantted stocks dataFrame_o = self.dataFrame.copy() # drop NaN and unwanted stocks self.dataFrame = self.dataFrame.dropna() # get the normal trading stocks self.dataFrame = self.dataFrame.loc[self.dataFrame['LimitStatus'] == 0] # get not ST/ST*/dropped out stocks self.dataFrame = self.dataFrame.loc[self.dataFrame['Status'] == 1] # get listed number of days is over para.listnum stocks self.dataFrame = self.dataFrame.loc[ self.dataFrame['listDateNum'] >= para.listnum] # determine if we want financial stocks(codes = 41) if para.fin_stock == 'no': self.dataFrame = self.dataFrame.loc[ self.dataFrame['Industry'] != 41] # calculate the spearimanr rank IC for factors and return IC_matrix, _ = spearmanr( self.dataFrame.iloc[:, -len(para.factorlist):], self.dataFrame.loc[:, 'RET']) # get the exact rank IC between factors and return IC = IC_matrix[:-1, -1] # get IC list from the beginning of the time IC_list.append(IC) # get all of the IC with the same weights if synthesis_Method == '等权重': weight = np.array([1 / len(para.factorlist)] * len(para.factorlist)) Factor = Factor.append( pd.DataFrame(dataFrame_o.iloc[:, -len(para.factorlist):].dot( weight).values.reshape( 1, -1), columns=dataFrame_o.index, index=[currentDate])) # consider the average of the IC list from the beginning of the time elif synthesis_Method == 'IC均值加权': if len(WIC) == 0: WIC = IC else: WIC = 1 / len(IC_list) * IC + ( 1 - 1 / len(IC_list)) * np.sum(IC_list) Factor = Factor.append( pd.DataFrame( (dataFrame_o.iloc[:, -len(para.factorlist):].dot(WIC) ).values.reshape(1, -1), columns=dataFrame_o.index, index=[currentDate])) # IC IR method is to weight according to the IR # IR = mean(IC)/std(IC) # considering the volatility of IC elif synthesis_Method == 'ICIR加权': if len(WIC) == 0: WIC = IC else: WIC = 1 / len(IC_list) * IC + ( 1 - 1 / len(IC_list)) * np.sum(IC_list) if len(IC_list) < para.backtestwindow: demoninator = np.std(pd.DataFrame(IC_list), axis=0) else: demoninator = np.std( pd.DataFrame(IC_list).iloc[-para.backtestwindow:, :], axis=0) weight = np.array(WIC) / np.array(demoninator) Factor = Factor.append( pd.DataFrame( (dataFrame_o.iloc[:, -len(para.factorlist):].dot(weight) ).values.reshape(1, -1), columns=dataFrame_o.index, index=[currentDate])) # maximum IR to get the weights # use sample martix for covariance estimation # sklearn LedoitWolf # https://scikit-learn.org/stable/modules/generated/sklearn.covariance.LedoitWolf.html#sklearn.covariance.LedoitWolf # IR = weight * mean(IC) / sqrt(weight * cov_matrix * weight) # max IR is equal to the solution of the FOS # weight = mean(IC) / cov_matrix elif synthesis_Method == '最大化IR加权': if len(WIC) == 0: WIC = IC else: WIC = 1 / len(IC_list) * IC + ( 1 - 1 / len(IC_list)) * np.sum(IC_list) cov = LedoitWolf().fit( self.dataFrame.iloc[:, -len(para.factorlist):]) cov_shrink = cov.covariance_[0, :] weight = np.array(WIC) / cov_shrink Factor = Factor.append( pd.DataFrame( (dataFrame_o.iloc[:, -len(para.factorlist):].dot(weight) ).values.reshape(1, -1), columns=dataFrame_o.index, index=[currentDate])) # shrinking the time-series weights of IC elif synthesis_Method == '半衰IC加权': if len(WIC) == 0: WIC = IC else: WIC = para.shrink_weight * IC + (1 - para.shrink_weight) * WIC Factor = Factor.append(pd.DataFrame((dataFrame_o.iloc[:,-len(para.factorlist):].\ dot(WIC)).values.reshape(1,-1), columns=dataFrame_o.index, index=[currentDate])) return Factor
elif para.ret_calMethod == '简单': ret = Price.loc[currentDate, :] / Price.loc[lastDate, :] - 1 dataFrame = pd.concat([ ret, LimitStatus.loc[currentDate, :], Status.loc[currentDate, :], listDateNum.loc[currentDate, :], Industry.loc[currentDate, :], Size.loc[currentDate, :] ], axis=1, sort=True) dataFrame.columns = [ 'RET', 'LimitStatus', 'Status', 'listDateNum', 'Industry', 'Size' ] for i, factor_name in enumerate(para.factorlist): factor_i = loadData(factor_name) factor_ii = factor_i.BasicDailyFactorAlpha.loc[ para.startDate:para.endDate, :] factor_ii = (factor_ii - np.min(factor_ii)) / (np.max(factor_ii) - np.min(factor_ii)) dataFrame.loc[:, factor_name] = factor_ii.loc[currentDate, :] dataFrame_o = dataFrame.copy() dataFrame = dataFrame.dropna() dataFrame = dataFrame.loc[dataFrame['LimitStatus'] == 0] # 提取非涨跌停的正常交易的数据 dataFrame = dataFrame.loc[dataFrame['Status'] == 1] # 提取非ST/ST*/退市的正常交易的数据 dataFrame = dataFrame.loc[dataFrame['listDateNum'] >= para.listnum] # 提取上市天数超过listnum的股票 if para.fin_stock == 'no': # 非银行金融代号41 dataFrame = dataFrame.loc[dataFrame['Industry'] != 41]