def __init__(self, para): # get trading date list as monthly frequancy self.tradingDateList = getTradingDateFromJY(para.startDate, para.endDate, ifTrade=True, Period='M') self.Price, self.LimitStatus, self.Status, self.listDateNum, self.Industry, self.Size = basic_data( para) # self.Factor = pd.read_excel(para.data_path + para.factor + '.xlsx',index_col = 0).T Factor = pd.read_csv(para.data_path + para.factor + '.csv', index_col=0) self.Factor = stock_dif(Factor, self.LimitStatus) # self.Factor = Factor.loc[para.startDate:para.endDate, :] # self.Factor.index = self.LimitStatus.index.copy() # Factor.index.astype('str') # Factor.index = pd.to_datetime(Factor.index, format='%Y%m%d') # self.Price = stock_dif(self.Price,self.Factor) # self.LimitStatus = stock_dif(self.LimitStatus, self.Factor) # self.Status = stock_dif(self.Status, self.Factor) # self.listDateNum = stock_dif(self.listDateNum, self.Factor) # self.Industry = stock_dif(self.Industry, self.Factor) # self.Size = stock_dif(self.Size, self.Factor) # self.Factor.index = self.LimitStatus.index pass
def __init__(self,para): # get trading date list as monthly frequancy self.tradingDateList = getTradingDateFromJY(para.startDate, para.endDate, ifTrade=True, Period='M') self.Price, self.LimitStatus, self.Status, self.listDateNum, self.Industry, self.Size = basic_data(para) self.Factor = pd.read_csv(para.result_path+para.factor+'.csv',index_col=0) pass
def __init__(self): self.tradingDateList = getTradingDateFromJY(para.startDate, para.endDate, ifTrade=True, Period='M') Factor = loadData(para = para.factor).BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :] self.Factor2 = pd.read_csv(para.result_path+'_'+para.factor2+'.csv',index_col=0) self.Price, self.LimitStatus, self.Status, self.listDateNum, self.Industry, self.Size = basic_data(para) self.Factor = stock_dif(Factor, self.LimitStatus) self.Factor.index = self.LimitStatus.index.copy() # self.Factor2 = stock_dif(Factor2, self.LimitStatus) # self.Factor2.index = self.LimitStatus.index.copy() # _, self.Factor2 = Gmain().every_month() # self.Factor2.columns = self.LimitStatus.columns.copy() pass
def __init__(self, para): # get trading date list as monthly frequancy self.tradingDateList = getTradingDateFromJY(para.startDate, para.endDate, ifTrade=True, Period='M') DATA = loadData(para.factor) Factor = DATA.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :] self.Price, self.LimitStatus, self.Status, self.listDateNum, self.Industry, self.Size = basic_data( para) self.Factor = stock_dif(Factor, self.LimitStatus) pass
def __init__(self): self.tradingDateList = getTradingDateFromJY(para.startDate, para.endDate, ifTrade=True, Period='M') # get basid data from function basic_data self.Price, \ self.LimitStatus, \ self.Status, \ self.listDateNum, \ self.Industry, \ self.Size = basic_data(para) pass
def __init__(self,para): # get trading date list as monthly frequancy self.tradingDateList = getTradingDateFromJY(para.startDate, para.endDate, ifTrade=True, Period='M') self.Price, self.LimitStatus, self.Status, self.listDateNum, self.Industry, self.Size = basic_data(para) Factor = pd.read_csv(para.data_path + para.factor + '.csv', index_col=0) Factor = Factor.loc[para.startDate:para.endDate,:] self.Factor = stock_dif(Factor, self.LimitStatus) self.df = pd.read_csv(para.data_path + 'whole.csv', index_col=0) pass
def data_initial_F(para): tradingDateList = getTradingDateFromJY(20091231, para.endDate, ifTrade=True, Period='M') ################################# 涨跌停数据:1表示是涨停,-1表示跌停,0表示非涨跌停 UpDownLimitStatus = pd.read_hdf( dataPathPrefix + '\DataBase\Data_AShareEODPrices\BasicDailyFactor_UpDownLimitStatus.h5') LimitStatus = UpDownLimitStatus.loc[para.startDate:para.endDate, :] ################################# 盈利类因子 #################################### 提取ROA因子 ROA = loadData(para='ROA') ROA_ = ROA.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :] ROA_ = stock_dif(ROA_, LimitStatus) ROA_[ROA_ > 0] = 1 ROA_[ROA_ <= 0] = 0 ROA_.index = ROA.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :].index #################################### CFOA因子 CFO = loadData(para='CFO') CFO_ = CFO.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :] CFO_ = stock_dif(CFO_, LimitStatus) CFO_[CFO_ > 0] = 1 CFO_[CFO_ <= 0] = 0 CFO_.index = CFO.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :].index #################################### delta_ROA因子 GROAQ = loadData(para='GROAQ') GROAQ_ = GROAQ.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :] GROAQ_ = stock_dif(GROAQ_, LimitStatus) GROAQ_[GROAQ_ > 0] = 1 GROAQ_[GROAQ_ <= 0] = 0 GROAQ_.index = GROAQ.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :].index # #################################### Accrual 应计量 Accrual = CFO.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :] \ - ROA.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :] Accrual_ = Accrual.copy() Accrual_ = stock_dif(Accrual_, LimitStatus) Accrual_[Accrual_ < 0] = 1 Accrual_[Accrual_ >= 0] = 0 Accrual_.index = Accrual.loc[para.startDate:para.endDate, :].index # #################################### delta_leverage因子 BLEV = loadData(para='BLEV') delta_BLEV = BLEV.BasicDailyFactorAlpha.diff() delta_BLEV_ = delta_BLEV.loc[para.startDate:para.endDate, :] delta_BLEV_ = stock_dif(delta_BLEV_, LimitStatus) delta_BLEV_[delta_BLEV_ <= 0] = 1 delta_BLEV_[delta_BLEV_ > 0] = 0 delta_BLEV_.index = delta_BLEV.loc[para.startDate:para.endDate, :].index # ################################### delta_CurrentRatio因子 CurrentRatio = loadData(para='CurrentRatio') delta_CurrentRatio = CurrentRatio.BasicDailyFactorAlpha.diff() delta_CurrentRatio_ = delta_CurrentRatio.loc[para.startDate:para.endDate, :] delta_CurrentRatio_ = stock_dif(delta_CurrentRatio_, LimitStatus) delta_CurrentRatio_[delta_CurrentRatio_ <= 0] = 0 delta_CurrentRatio_[delta_CurrentRatio_ > 0] = 1 delta_CurrentRatio_.index = delta_CurrentRatio.loc[para.startDate:para.endDate, :].index logreturn = pd.read_csv(para.data_path + 'logreturn.csv', index_col=0) logreturn = logreturn.loc[para.startDate:para.endDate, :] # ################################### EG_OFFER 过去一年是否增发或配股 EG_OFFER = pd.read_csv(para.data_path + 'EG_OFFER.csv', index_col=0) column_num = len(logreturn.index) df_merge = pd.DataFrame(columns=logreturn.index, index=logreturn.columns) EG_OFFER_row = pd.merge(EG_OFFER, df_merge, how='inner', left_index=True, right_index=True) EG_OFFER = pd.merge(EG_OFFER_row, df_merge, how='outer', left_index=True, right_index=True) EG_OFFER = EG_OFFER.iloc[:, :-column_num] EG_OFFER = EG_OFFER.dropna(how='all', axis=1) EG_OFFER.columns = tradingDateList[:-1] EG_OFFER = EG_OFFER.T # ################################### delta_GrossProfitMargin GrossProfitMargin = loadData(para='GrossProfitMargin') delta_GrossProfitMargin = GrossProfitMargin.BasicDailyFactorAlpha.diff() delta_GrossProfitMargin_ = delta_GrossProfitMargin.loc[para.startDate:para.endDate, :] delta_GrossProfitMargin_ = stock_dif(delta_GrossProfitMargin_, LimitStatus) delta_GrossProfitMargin_[delta_GrossProfitMargin_ <= 0] = 0 delta_GrossProfitMargin_[delta_GrossProfitMargin_ > 0] = 1 delta_GrossProfitMargin_.index = delta_GrossProfitMargin.loc[para.startDate:para.endDate, :].index # ################################### delta_AssetsTurn AssetsTurn = loadData(para='AssetsTurn') delta_AssetsTurn = AssetsTurn.BasicDailyFactorAlpha.diff() delta_AssetsTurn_ = delta_AssetsTurn.loc[para.startDate:para.endDate, :] delta_AssetsTurn_ = stock_dif(delta_AssetsTurn_, LimitStatus) delta_AssetsTurn_[delta_AssetsTurn_ <= 0] = 0 delta_AssetsTurn_[delta_AssetsTurn_ > 0] = 1 delta_AssetsTurn_.index = delta_AssetsTurn.loc[para.startDate:para.endDate, :].index return ROA_,CFO_,GROAQ_,Accrual_,\ delta_BLEV_,delta_CurrentRatio_,EG_OFFER,\ delta_GrossProfitMargin_,delta_AssetsTurn_
def data_initial_G(para): ################################# 涨跌停数据:1表示是涨停,-1表示跌停,0表示非涨跌停 UpDownLimitStatus = pd.read_hdf( dataPathPrefix + '\DataBase\Data_AShareEODPrices\BasicDailyFactor_UpDownLimitStatus.h5') LimitStatus = UpDownLimitStatus.loc[para.startDate:para.endDate, :] ################################# ST/ST*数据: 1表示正常 StockTradeStatus = pd.read_hdf( dataPathPrefix + '\DataBase\Data_AShareTradeStatus\BasicDailyFactor_StockTradeStatus.h5') Status = StockTradeStatus.loc[para.startDate:para.endDate, :] ############################### 过去交易的天数 StockListDateNum = pd.read_hdf( dataPathPrefix + '\DataBase\Data_AShareDescription\BasicDailyFactor_StockListDateNum.h5') listDateNum = StockListDateNum.loc[para.startDate:para.endDate, :] ################################# 行业分类数据 Data_AShareIndustryClass = pd.read_hdf( dataPathPrefix + '\DataBase\Data_AShareIndustryClass\AShareIndustriesClassCITICSNew_FirstIndustries.h5') Industry = Data_AShareIndustryClass.loc[para.startDate:para.endDate, :] Industry = stock_dif(Industry, LimitStatus) Industry.index = Data_AShareIndustryClass.loc[para.startDate:para.endDate, :].index ################################# A股总市值 StockTotalMV = pd.read_hdf( dataPathPrefix + '\DataBase\Data_AShareEODDerivativeIndicator\BasicDailyFactor_StockTotalMV.h5') Size = StockTotalMV.loc[para.startDate:para.endDate, :] Size = stock_dif(Size, LimitStatus) Size.index = StockTotalMV.loc[para.startDate:para.endDate, :].index logreturn = pd.read_csv(para.data_path + 'logreturn.csv', index_col=0) logreturn = logreturn.loc[para.startDate:para.endDate, :] tradingDateList = getTradingDateFromJY(para.startDate, para.endDate, ifTrade=True, Period='M') ################################# 盈利类因子 #################################### 提取ROA因子 ROA = loadData(para='ROA') ROA_ = ROA.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :] ROA_ = stock_dif(ROA_, LimitStatus) ROA_.index = ROA.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :].index #################################### CFOA因子 CFO = loadData(para='CFO') CFO_ = CFO.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :] CFO_ = stock_dif(CFO_, LimitStatus) CFO_.index = CFO.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :].index # #################################### Accrual 应计量 Accrual = CFO.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :] \ - ROA.BasicDailyFactorAlpha.loc[para.startDate:para.endDate, :] Accrual_ = Accrual.copy() Accrual_ = stock_dif(Accrual_, LimitStatus) Accrual_.index = Accrual.loc[para.startDate:para.endDate, :].index # #################################### ROA的方差 ROA_row = ROA_ ROA_VAR_list = [] for i, Date in enumerate(tradingDateList): if i < para.backtestwindow: continue ROA_row = ROA_row.iloc[i - para.backtestwindow:i, :].dropna(axis=1, how='all') ROA_VAR = ROA_row.var() ROA_VAR_list.append(ROA_VAR) ROA_VAR = pd.DataFrame(ROA_VAR_list, index=tradingDateList[para.backtestwindow:]) ###################################### 营业收入同比增量率的方差 Sales_G_TTM = loadData(para='OperatingRevenueQYOY').BasicDailyFactorAlpha Sales_G_TTM = stock_dif(Sales_G_TTM, LimitStatus) Sales_G_TTM_list = [] for i, Date in enumerate(tradingDateList): if i < para.backtestwindow: continue Sales_G_TTM_row = Sales_G_TTM.iloc[i - para.backtestwindow:i, :].dropna(axis=1, how='all') Sales_G_TTM_VAR = Sales_G_TTM_row.var() Sales_G_TTM_list.append(Sales_G_TTM_VAR) Sales_G_TTM_VAR = pd.DataFrame(Sales_G_TTM_list, index=tradingDateList[para.backtestwindow:]) Sales_G_TTM_VAR = stock_dif(Sales_G_TTM_VAR, LimitStatus) Sales_G_TTM_VAR.index = tradingDateList[para.backtestwindow:] ##################################### R&D /总资产 # R&D数据是用中报和年报的数据填充,4月底开始填充去年年报,9月底开始填充当前中报 R_and_D = pd.read_csv(para.data_path + 'RandD.csv', index_col=0) column_num = len(logreturn.index) df_merge = pd.DataFrame(columns=logreturn.index, index=logreturn.columns) R_and_D_row = pd.merge(R_and_D.T, df_merge, how='inner', left_index=True, right_index=True) R_and_D_row_ = pd.merge(R_and_D_row, df_merge, how='outer', left_index=True, right_index=True) R_and_D_row_ = R_and_D_row_.iloc[:, :-column_num] R_and_D_row_ = R_and_D_row_.dropna(how='all', axis=1) R_and_D_row_.columns = tradingDateList[:-1] R_and_D_ = R_and_D_row_.T Total_MV = loadData(para='TotalMV').BasicDailyFactorAlpha RD_MV_list = [] for i, Date in enumerate(list(R_and_D.index)): RD_MV = R_and_D_.iloc[i, :] / Total_MV.loc[Date, :] RD_MV_list.append(RD_MV) RD_MV_ = pd.DataFrame(RD_MV_list, index=tradingDateList[:-1]) ###################################### 销售费用 /总资产 # 销售费用用季报数据填充,分段时间点为4月底(去年年报),8月底用今年中报,10月底用今年三季报 Sales = pd.read_csv(para.data_path + 'sales.csv', index_col=0) column_num = len(logreturn.index) df_merge = pd.DataFrame(columns=logreturn.index, index=logreturn.columns) Sales_row = pd.merge(Sales.T, df_merge, how='inner', left_index=True, right_index=True) Sales_row_ = pd.merge(Sales_row, df_merge, how='outer', left_index=True, right_index=True) Sales_row_ = Sales_row_.iloc[:, :-column_num] Sales_row_ = Sales_row_.dropna(how='all', axis=1) Sales_row_.columns = tradingDateList[:-1] Sales_ = Sales_row_.T Total_MV = loadData(para='TotalMV').BasicDailyFactorAlpha Sales_MV_list = [] for i, Date in enumerate(list(Sales_.index)): Sales__MV = Sales_.iloc[i, :] / Total_MV.loc[Date, :] Sales_MV_list.append(Sales__MV) Sales_MV_ = pd.DataFrame(Sales_MV_list, index=tradingDateList[:-1]) ###################################### 资本性支出 /总资产 # 资本性支出用季报数据填充,分段时间点为4月底(去年年报),8月底用今年中报,10月底用今年三季报 Expenditure = pd.read_csv(para.data_path + 'expenditure.csv', index_col=0) column_num = len(logreturn.index) df_merge = pd.DataFrame(columns=logreturn.index, index=logreturn.columns) Expenditure_row = pd.merge(Expenditure.T, df_merge, how='inner', left_index=True, right_index=True) Expenditure_row_ = pd.merge(Expenditure_row, df_merge, how='outer', left_index=True, right_index=True) Expenditure_row_ = Expenditure_row_.iloc[:, :-column_num] Expenditure_row_ = Expenditure_row_.dropna(how='all', axis=1) Expenditure_row_.columns = tradingDateList[:-1] Expenditure_ = Expenditure_row_.T Total_MV = loadData(para='TotalMV').BasicDailyFactorAlpha Expenditure_MV_list = [] for i, Date in enumerate(list(Expenditure_.index)): Expenditure_MV = Sales_.iloc[i, :] / Total_MV.loc[Date, :] Expenditure_MV_list.append(Expenditure_MV) Expenditure_MV_ = pd.DataFrame(Expenditure_MV_list, index=tradingDateList[:-1]) return ROA_,CFO_,Accrual_,\ ROA_VAR,Sales_G_TTM_VAR,\ RD_MV_,Sales_MV_,Expenditure_MV_
import pandas as pd from tqdm import tqdm import warnings warnings.filterwarnings('ignore') from getTradingDate import getTradingDateFromJY class Para(): startDate = 20120131 endDate = 20200508 lookBackDay = 60 dataPathPrefix = 'D:\caitong_security' pass para = Para() # get time series tradingDateList = getTradingDateFromJY(para.startDate, para.endDate, ifTrade=True, Period='D') # get original data StockResearch = pd.read_hdf(para.dataPathPrefix +'\Data(1)\Data_StockInvestorRelation\Stock_InvestorRelationship.h5') # get stock codes all_stock_list = StockResearch.SecuCode.unique() # In[] stock_research_number = [] for k, stock_k in enumerate(tqdm(all_stock_list)): tmpDf = StockResearch.loc[StockResearch['SecuCode'] == stock_k,:] reseach_list = [] for i, curentDate in enumerate(tradingDateList): lastDate = tradingDateList[tradingDateList.index(curentDate) - para.lookBackDay] tmpDf2 = tmpDf[(tmpDf.InfoPublDate >= lastDate) & \ (tmpDf.InfoPublDate <= curentDate)] research = len(tmpDf2)
def __init__(self): self.tradingDateList = getTradingDateFromJY(para.startDate, para.endDate, ifTrade=True, Period='M') pass