def Get_total_loading(ret, tech_data, rm, macro_data, finance_loading, dummy): interval = np.arange(192) # Technical indicators tech_loading = tech.fb_reg_over_time(ret.ix[interval], tech_data, interval).iloc[:, :-1] # standardization tech_temp = pd.DataFrame(scale(tech_loading, axis=0)[:-3, :]) # Market indicators mkt_loading = widgets.rm_reg_ri(rm.ix[interval], ret.ix[interval]) # standardization mkt_temp = pd.DataFrame(scale(mkt_loading, axis=0)[:-3, :]) # Macro indicators macro_loading = macro.macro_reg_ret(ret.ix[interval], macro_data.ix[interval]) # standardization macro_temp = pd.DataFrame(scale(macro_loading, axis=0)[:-3, :]) # Financial factors finance_loading_temp = finance_loading.iloc[:-3, :] # standardization finance_temp = pd.DataFrame(scale(finance_loading_temp, axis=0)) # Industry factor indicators ind_loading = dummy # Not standardized ind_temp = pd.concat( [ind_loading.iloc[:-3, :-2], ind_loading.iloc[:-3, -1]], axis=1) # Stitching the data to get the total loading total_loading = pd.concat( [tech_temp, mkt_temp, macro_temp, finance_temp, ind_temp], axis=1 ) # Initialize, and according to the study of the two generated data inside a large number of 0, so thrown return total_loading
def Get_total_loading(ret,tech_data,rm,macro_data,finance_loading,dummy): interval = np.arange(192) #技术指标 tech_loading = tech.fb_reg_over_time(ret.ix[interval], tech_data,interval).iloc[:,:-1] #标准化 tech_temp = pd.DataFrame(scale(tech_loading,axis=0)[:-3,:]) #市场指标 mkt_loading = widgets.rm_reg_ri(rm.ix[interval], ret.ix[interval]) #标准化 mkt_temp = pd.DataFrame(scale(mkt_loading,axis=0)[:-3,:]) #宏观指标 macro_loading = macro.macro_reg_ret(ret.ix[interval], macro_data.ix[interval]) #标准化 macro_temp = pd.DataFrame(scale(macro_loading,axis=0)[:-3,:]) #财务因子指标 finance_loading_temp = finance_loading.iloc[:-3,:] #标准化 finance_temp = pd.DataFrame(scale(finance_loading_temp,axis=0)) #行业因子指标 ind_loading = dummy #不标准化 ind_temp = pd.concat([ind_loading.iloc[:-3,:-2],ind_loading.iloc[:-3,-1]],axis=1) #拼接数据,得到总的loading total_loading = pd.concat([tech_temp,mkt_temp,macro_temp,finance_temp,ind_temp],axis=1) # 初始化,并且根据考察这两个生成的数据里面有大量0,所以扔了 return total_loading
'E:\\QuantProject2\\temp_data\\all_stocks_dummy.xlsx') dummy[0] = dummy[0].apply(lambda x: x[:-3]) dummy = dummy.set_index([0]) dummy = dummy.ix[stkcd['stkcd'].values] dummy = pd.DataFrame(dummy.values) # 代码为000009的股票没有行业信息,将其dummy令为0 dummy.fillna(0, inplace=True) # 用全部时间的数据来计算一个loading,然后用这个loading去回归收益率,通过显著月数来判断决定保留哪些技术指标 all_tech_loading = tech.fb_reg_over_all_time(ret, tech_data) significant_days_tech = tech.ret_reg_loading(all_tech_loading, ret, dummy) print("Significant_days_tech:") print(significant_days_tech) interval = np.arange(192) significant_days_mkt = tech.ret_reg_loading( widgets.rm_reg_ri(rm.ix[interval], ret.ix[interval]), ret, dummy) print("Significant_days_market:") print(significant_days_mkt) significant_days_macro = tech.ret_reg_loading( macro.macro_reg_ret(ret.ix[interval], macro_data.ix[interval]), ret, dummy) print("Significant_days_macro:") print(significant_days_macro) # 根据上面注释掉的那段程序的结果,删掉了EMA,trade这两个技术指标,以及最后两个宏观指标 tech_data.pop('EMA') tech_data.pop('trade') # 计算loading loading = dict() for i in range(73): interval = range(i, i + 119) #技术指标(删去了最后一个市值自变量,由于下面要用它来做WLS)
dummy=pd.read_excel('E:\\QuantProject2\\temp_data\\all_stocks_dummy.xlsx') dummy[0]=dummy[0].apply(lambda x:x[:-3]) dummy=dummy.set_index([0]) dummy=dummy.ix[stkcd['stkcd'].values] dummy=pd.DataFrame(dummy.values) # Code for the 000009 stock no industry information, the dummy order to 0 dummy.fillna(0,inplace=True) # Use the full time of the data to calculate a loading, and then use this loading to return to the rate of return, # through a significant number of months to determine what the decision to retain the technical indicators all_tech_loading = tech.fb_reg_over_all_time(ret, tech_data) significant_days_tech=tech.ret_reg_loading(all_tech_loading,ret,dummy) print ("Significant_days_tech:") print (significant_days_tech) interval=np.arange(192) significant_days_mkt = tech.ret_reg_loading(widgets.rm_reg_ri(rm.ix[interval], ret.ix[interval]),ret,dummy) print ("Significant_days_market:") print (significant_days_mkt) significant_days_macro = tech.ret_reg_loading(macro.macro_reg_ret(ret.ix[interval], macro_data.ix[interval]),ret,dummy) print ("Significant_days_macro:") print (significant_days_macro) # Based on the results of the procedure noted above, the two technical indicators, EMA, trade, # and the last two macro indicators tech_data.pop('EMA') tech_data.pop('trade') # Calculation loading=dict() for i in range(73): interval = range(i,i+119) #Technical indicators (delete the last market value of the independent variables, # because the following to use it to do WLS)
# 读入dummy 矩阵并选取其中我们所需的股票 dummy=pd.read_excel('E:\\QuantProject2\\temp_data\\all_stocks_dummy.xlsx') dummy[0]=dummy[0].apply(lambda x:x[:-3]) dummy=dummy.set_index([0]) dummy=dummy.ix[stkcd['stkcd'].values] dummy=pd.DataFrame(dummy.values) # 代码为000009的股票没有行业信息,将其dummy令为0 dummy.fillna(0,inplace=True) # 用全部时间的数据来计算一个loading,然后用这个loading去回归收益率,通过显著月数来判断决定保留哪些技术指标 all_tech_loading = tech.fb_reg_over_all_time(ret, tech_data) significant_days_tech=tech.ret_reg_loading(all_tech_loading,ret,dummy) print ("Significant_days_tech:") print (significant_days_tech) interval=np.arange(192) significant_days_mkt = tech.ret_reg_loading(widgets.rm_reg_ri(rm.ix[interval], ret.ix[interval]),ret,dummy) print ("Significant_days_market:") print (significant_days_mkt) significant_days_macro = tech.ret_reg_loading(macro.macro_reg_ret(ret.ix[interval], macro_data.ix[interval]),ret,dummy) print ("Significant_days_macro:") print (significant_days_macro) # 根据上面注释掉的那段程序的结果,删掉了EMA,trade这两个技术指标,以及最后两个宏观指标 tech_data.pop('EMA') tech_data.pop('trade') # 计算loading loading=dict() for i in range(73): interval = range(i,i+119) #技术指标(删去了最后一个市值自变量,由于下面要用它来做WLS) tech_loading = tech.fb_reg_over_time(ret.ix[interval], tech_data,interval).iloc[:-3,:-1] #tech_loading = tech_loading.drop([5],axis=0)