def easy_compose_optimization( self, stock_pool, bench, ): # 得到风险因子的因子矩阵,以dict形式存储,key为因子名称。 risk_factors = ['size'] # self.factor_dict['risk_factor'] codes = list(stock_pool.index) risk_fac_data = { fac: get_factor([fac], codes)[fac] for fac in risk_factors } # 把因子矩阵形式的存储,变成字典形式的存储,每个key是日期,value是行为codes,列为factors的dataframe limit_fac_data = concat_factors_panel(risk_factors, risk_fac_data, codes, ind=True, mktcap=False) data_dict = { 'limit_fac_data': limit_fac_data, 'index_wt': self.index_wei, 'est_stock_rets': stock_pool } stock_wt = linear_programming(data_dict) return stock_wt
def layer_division_bt(factors, path_d, layer_num): sf_test_save_path = path_d['sf_test_save_path'] factor_path = path_d['factor_path'] factor_matrix_path = path_d['factor_matrix_path'] start_date = '2009-02-27' end_date = '2019-07-31' if_concise = True # 是否进行月频简化回测 save_path_tmp = os.path.join(sf_test_save_path, '分层回测') # 创建分层回测结果图的存放目录 if not os.path.exists(save_path_tmp): os.mkdir(save_path_tmp) # print('因子数据创建完毕') pct_chg_nm = get_factor(['PCT_CHG_NM'], basic_path=factor_matrix_path)['PCT_CHG_NM'] # 对选中的因子或者全部因子遍历 # print("开始进行因子分层回测...") for fname in factors: # print(fname) openname = fname.replace('/', '_div_') facdat = pd.read_csv(os.path.join(factor_matrix_path, openname + '.csv'), encoding='gbk', engine='python', index_col=[0]) facdat.columns = pd.to_datetime(facdat.columns) s = SingleFactorLayerDivisionBacktest(factor_name=fname, factor_data=facdat, num_layers=layer_num, if_concise=if_concise, start_date=start_date, end_date=end_date, pct_chg_nm=pct_chg_nm) records = s.run_layer_division_bt(equal_weight=True) if not records.empty: plot_layerdivision(path_d, records, fname, if_concise) # 绘制分层图 bar_plot_yearly(path_d, records, fname, if_concise) # 绘制分年分层收益柱形图 plot_group_diff_plot(path_d, records, fname, if_concise) # 绘制组1-组5净值图 print(f"分层回测结束!结果见目录:{sf_test_save_path}") print('*' * 80)
def compute_factor_return(self): factors_dict = self.factor_dict path_dict = self.path_dict # ---------------------------------- # 因子检测 f_list = [] for values in factors_dict.values(): for v in values: f_list.append(v) f_list = [col.replace('_div_', '/') for col in f_list] tmp = os.listdir(path_dict['factor_panel_path']) df_tmp = pd.read_csv(os.path.join(path_dict['factor_panel_path'], tmp[-1]), engine='python', encoding='gbk', index_col=[0]) cols = [col for col in df_tmp.columns] if not set(f_list).issubset(cols): print('factor 不够, 缺失的因子为:') print(set(f_list) - set(cols)) else: print('通过因子完备性测试') # ---------------------------------- # 因子合成 # if os.path.exists(os.path.join(path_dict['save_path'], '新合成因子')): # shutil.rmtree(os.path.join(path_dict['save_path'], '新合成因子')) # print('开始进行因子合成处理.....') # for factor_con, factors_to_con in factors_dict.items(): # # 'equal_weight' 'max_ic_ir' # my_factor_concat(path_dict, factors_to_con, factor_con, concat_type='equal_weight') # print('因子合成完毕!') params = { 'factors': [key for key in factors_dict.keys()], 'window': 6, 'half_life': None, } copy_matrix(path_dict['old_matrix_path'], os.path.join(path_dict['save_path'], '新合成因子', '因子矩阵')) # 估计预期收益 path_dict.update({ 'matrix_path': os.path.join(path_dict['save_path'], '新合成因子', '因子矩阵') }) matrix_path = path_dict['matrix_path'] factors, window, half_life = params['factors'], params['window'], \ params['half_life'] factors.extend(['Pct_chg_nm']) # 得到所有因子的因子矩阵,以dict形式存储,行业部分没有风险因子 factors_dict = { fac: get_factor(factors, basic_path=matrix_path)[fac] for fac in factors } # 将alpha因子整理为截面形式 factors_panel = concat_factors_panel(factors=None, factors_dict=factors_dict, codes=None, ind=False, mktcap=False, perchg_nm=False) # 删除开始factor不全的截面 to_del = [] for key, values in factors_panel.items(): print(key) for f in factors: if f not in values.columns: print(f) to_del.append(key) break for d in to_del: factors_panel.pop(d) self.factors_panel = factors_panel factor_ret_history, sig = history_factor_return( factors, factors_panel, window, half_life) # 最后一行的nan先不能删除,因为后面预测时要向后shift一行 return factor_ret_history, sig
def get_equal_wei_with_index_ret(est_stock_rets, bestN_stock=None, est_indus_rets=None, bestN_indux=None): if isinstance(est_indus_rets, pd.DataFrame): # 等权配置预测表现好的行业 wt_indus = pd.DataFrame(np.zeros(np.shape(est_indus_rets)), index=est_indus_rets.index, columns=est_indus_rets.columns) # 逐列遍历 for col, itmes in est_indus_rets.iteritems(): itmes = itmes.sort_values(ascending=False) select_id = itmes.index[0:bestN_indux] wt_indus.loc[select_id, col] = 1 wt_indus.columns = pd.to_datetime(wt_indus.columns) # 把二级行业与股票预测数据合并 si = get_factor( ['second_industry'], basic_path=r'D:\pythoncode\IndexEnhancement\多因子选股\正交后因子\因子矩阵' )['second_industry'] est_indus_rets.columns = pd.to_datetime(est_indus_rets.columns) si.columns = pd.to_datetime(si.columns) # 日期对齐 dl = list( set(est_indus_rets.columns) & set(est_stock_rets.columns) & set(si.columns)) dl = sorted(dl) bestN_stock_each_indus = int(bestN_stock / bestN_indux) wt_stock = pd.DataFrame() for date in dl: stock_bd = pd.concat([est_stock_rets[date], si[date]], axis=1) stock_bd = stock_bd.dropna(how='any') stock_bd.columns = ['ext_rets', 'second_indus'] indus_wei = wt_indus[date] grouped = stock_bd.groupby('second_indus') wei_tmp = pd.DataFrame() for name, group in grouped: # 未配置该行业 if indus_wei[name] == 0: wei_ttmp = pd.DataFrame(np.zeros([group.shape[0], 1]), index=group.index, columns=[date]) elif indus_wei[name] > 0: if group.shape[0] <= bestN_stock_each_indus: wei_ttmp = (1 / (bestN_stock_each_indus * group.shape[0])) * \ pd.DataFrame(np.ones([group.shape[0], 1]), index=group.index, columns=[date]) else: itmes = group['ext_rets'].sort_values(ascending=False) select_id = itmes.index[0:5] wei_ttmp = pd.DataFrame(np.zeros([group.shape[0], 1]), index=group.index, columns=[date]) wei_ttmp.loc[select_id, date] = 1 / bestN_stock wei_tmp = pd.concat([wei_tmp, wei_ttmp], axis=0) wei_tmp = wei_tmp / np.sum(wei_tmp) wt_stock = pd.concat([wt_stock, wei_tmp], axis=1) wt_stock = wt_stock.fillna(0) else: wt_stock = pd.DataFrame() for col, itmes in est_stock_rets.iteritems(): wei_tmp = pd.DataFrame(index=est_stock_rets.index) items = itmes.sort_values(ascending=False) not_nan_stock = items.index[~pd.isna(items)] nan_stock = items.index[pd.isna(items)] # 非nan的股票数量 items = items[not_nan_stock] # 选择出来的数量 s_num = int(len(items) * bestN_stock) select_id = itmes.index[0:s_num] wei_tmp.loc[select_id, col] = 1 wei_tmp = wei_tmp.fillna(0) wei_tmp = wei_tmp / np.sum(wei_tmp) wt_stock = pd.concat([wt_stock, wei_tmp], axis=1) return wt_stock
def optimization(est_stock_rets, index_wei, est_indus_rets=None): ''' :param est_indus_rets: 预测的股票收益率,若有,则是超配低配模式,若没有,则是行业中性模式 :param est_stock_rets: 预测的行业收益率 :param index_wei: 指数的行业权重 :return: 股票各期权重 ''' if isinstance(est_indus_rets, pd.DataFrame): # 获取股票的二级行业情况数据 si = get_factor( ['second_industry'], basic_path=r'D:\pythoncode\IndexEnhancement\多因子选股\正交后因子\因子矩阵' )['second_industry'] est_indus_rets.columns = pd.to_datetime(est_indus_rets.columns) si.columns = pd.to_datetime(si.columns) dl = list( set(est_indus_rets.columns) & set(est_stock_rets.columns) & set(index_wei.columns)) dl = sorted(dl) else: # 获取股票的一级行业情况数据 si = get_factor( ['industry_zx'], basic_path=r'D:\pythoncode\IndexEnhancement\多因子选股\正交后因子\因子矩阵' )['industry_zx'] est_indus_rets.columns = pd.to_datetime(est_indus_rets.columns) si.columns = pd.to_datetime(si.columns) dl = list(set(est_stock_rets.columns) & set(index_wei.columns)) dl = sorted(dl) total_wei = pd.DataFrame() for date in dl: if isinstance(est_indus_rets, pd.DataFrame): # 超配低配模式下的行业暴露 indus_bd = pd.concat([est_indus_rets[date], index_wei[date]], axis=1) indus_bd = indus_bd.dropna(how='any') indus_bd.columns = ['est_rets', 'index_wet'] up_quantile = 25 expand_beta = 2 dow_quantile = 70 shrink_beta = 0.5 indus_exposure = adjust_weight(indus_bd, up_quantile, expand_beta, dow_quantile, shrink_beta) # pd.isna(indus_exposure) else: # 超配低配模式下的行业暴露就是指数的行业暴露 indus_exposure = index_wei[date] stock_bd = pd.concat([est_stock_rets[date], si[date]], axis=1) stock_bd = stock_bd.dropna(how='any') stock_bd.columns = ['ext_rets', 'second_indus'] stock_bd = pd.get_dummies(stock_bd) stock_bd.columns = ['ext_rets'] + [ col.split('_')[-1] for col in stock_bd.columns if 'second' in col ] c = stock_bd['ext_rets'].values.reshape(1, len(stock_bd)) x0 = np.ones(len(stock_bd)) / len(stock_bd) # 初始值 c.shape bnds = [] for j in range(0, len(stock_bd)): bnds.append((0, 1)) bnds = tuple(bnds) stock_num = 100 A_eq1 = stock_bd[list(stock_bd.columns[1:])].values.T [a, b] = A_eq1.shape tmp = np.ones([1, b]) A_eq = np.r_[A_eq1, tmp] A_eq.shape # 对齐 indus_exposure = indus_exposure.reindex(list(stock_bd.columns[1:])) indus_exposure = indus_exposure.fillna(0) b_eq = indus_exposure.values.reshape(-1, 1) b_eq = np.r_[b_eq, np.array(1).reshape(1, 1)] b_eq.shape args = [stock_num, A_eq, b_eq] cons = get_con(args) res = minimize(target_fun, x0, args=c, bounds=bnds, constraints=cons) wei = res.x wei = wei / np.sum(wei) cur_wt = pd.DataFrame(wei, index=stock_bd.index, columns=[date]) total_wei = pd.concat([total_wei, cur_wt], axis=1) return total_wei
def optimization_equalwei(est_stock_rets, est_indus_rets, bestN): # 等权配置预测表现好的行业 wt_indus = pd.DataFrame(np.zeros(np.shape(est_indus_rets)), index=est_indus_rets.index, columns=est_indus_rets.columns) # 逐列遍历 for col, itmes in est_indus_rets.iteritems(): itmes = itmes.sort_values(ascending=False) select_id = itmes.index[0:bestN] wt_indus.loc[select_id, col] = 1 / bestN wt_indus.columns = pd.to_datetime(wt_indus.columns) # 把二级行业与股票预测数据合并 si = get_factor( ['second_industry'], basic_path=r'D:\pythoncode\IndexEnhancement\多因子选股\正交后因子\因子矩阵' )['second_industry'] est_indus_rets.columns = pd.to_datetime(est_indus_rets.columns) si.columns = pd.to_datetime(si.columns) # 日期对齐 dl = list( set(est_indus_rets.columns) & set(est_stock_rets.columns) & set(si.columns)) dl = sorted(dl) stock_num = 100 for date in dl: date = dl[0] stock_bd = pd.concat([est_stock_rets[date], si[date]], axis=1) stock_bd = stock_bd.dropna(how='any') stock_bd.columns = ['ext_rets', 'second_indus'] stock_bd = pd.get_dummies(stock_bd) stock_bd.columns = ['ext_rets'] + [ col.split('_')[-1] for col in stock_bd.columns if 'second' in col ] c = stock_bd['ext_rets'].values.reshape(len(stock_bd), 1) x0 = np.zeros(len(stock_bd)) / len(stock_bd) # 初始值 # c.shape bnds = [] for j in range(0, len(stock_bd)): bnds.append((0.0, 1.0)) bnds = tuple(bnds) A_eq = stock_bd[list(stock_bd.columns[1:])].values.T #A_eq.shape # 对齐 indus_exposure = wt_indus[date] indus_exposure = indus_exposure.reindex(list(stock_bd.columns[1:])) indus_exposure = indus_exposure.fillna(0) b_eq = indus_exposure.values.reshape(-1, 1) # b_eq.shape args = [stock_num, A_eq, b_eq] cons = get_con(args) res = minimize(target_fun, x0, args=c, bounds=bnds, constraints=cons, options={'disp': True}) wei = res.x print(res.success) res.status res.message res.nit wei = wei / np.sum(wei) cur_wt = pd.DataFrame(wei, index=stock_bd.index, columns=[date])