def out_sample_perf_c(pnl_df_out, way=1): if way == 1: sharpe_out = bt.AZ_Sharpe_y(pnl_df_out) else: sharpe_out = bt.AZ_Sharpe_y(-pnl_df_out) out_condition = sharpe_out > 0.8 return out_condition, round(sharpe_out * way, 2)
def out_sample_perf_c(pnl_df_out, way=1): # 根据sharpe大小,统计样本外的表现 # if cut_point_list is None: # cut_point_list = [0.30] # if way == 1: # rolling_sharpe, cut_sharpe = \ # bt.AZ_Rolling_sharpe(pnl_df_out, roll_year=0.5, year_len=250, cut_point_list=cut_point_list, output=True) # else: # rolling_sharpe, cut_sharpe = \ # bt.AZ_Rolling_sharpe(-pnl_df_out, roll_year=0.5, year_len=250, cut_point_list=cut_point_list, output=True) if way == 1: sharpe_out = bt.AZ_Sharpe_y(pnl_df_out) else: sharpe_out = bt.AZ_Sharpe_y(-pnl_df_out) out_condition = sharpe_out > 0.8 return out_condition, round(sharpe_out * way, 2)
def filter_all(cut_date, pos_df_daily, pct_n, if_return_pnl=False, if_only_long=False): if if_only_long: pnl_df = (pos_df_daily[pos_df_daily > 0] * pct_n).sum(axis=1) pnl_df = pnl_df.replace(np.nan, 0) else: pnl_df = (pos_df_daily * pct_n).sum(axis=1) pnl_df = pnl_df.replace(np.nan, 0) # pnl_df = pd.Series(pnl_df) # 样本内表现 return_in = pct_n[pct_n.index < cut_date] pnl_df_in = pnl_df[pnl_df.index < cut_date] asset_df_in = pnl_df_in.cumsum() last_asset_in = asset_df_in.iloc[-1] pos_df_daily_in = pos_df_daily[pos_df_daily.index < cut_date] pot_in = AZ_Pot(pos_df_daily_in, last_asset_in) leve_ratio = AZ_Leverage_ratio(asset_df_in) if leve_ratio < 0: leve_ratio = 100 sharpe_q_in_df = bt.AZ_Rolling_sharpe(pnl_df_in, roll_year=1, year_len=250, min_periods=1, cut_point_list=[0.3, 0.5, 0.7], output=False) sp_in = bt.AZ_Sharpe_y(pnl_df_in) fit_ratio = bt.AZ_fit_ratio(pos_df_daily_in, return_in) ic = round( bt.AZ_Normal_IC(pos_df_daily_in, pct_n, min_valids=None, lag=0).mean(), 6) sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d = sharpe_q_in_df.values in_condition_u = sharpe_q_in_df_u > 0.9 and leve_ratio > 1 in_condition_d = sharpe_q_in_df_d < -0.9 and leve_ratio > 1 # 分双边和只做多 if if_only_long: in_condition = in_condition_u else: in_condition = in_condition_u | in_condition_d if sharpe_q_in_df_m > 0: way = 1 else: way = -1 # 样本外表现 pnl_df_out = pnl_df[pnl_df.index >= cut_date] out_condition, sharpe_q_out = out_sample_perf_c(pnl_df_out, way=way) if if_return_pnl: return in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, \ fit_ratio, leve_ratio, sp_in, sharpe_q_out, pnl_df else: return in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, \ fit_ratio, leve_ratio, sp_in, sharpe_q_out
def plot_all_alpha(): all_pnl_df = pd.read_csv('/mnt/mfs/AATST/corr_tst_pnls', sep='|', index_col=0, parse_dates=True) for col in all_pnl_df.columns: pnl = all_pnl_df[col] pnl = pnl.replace(0, np.nan).fillna(method='ffill').dropna() plot_send_result(pnl, bt.AZ_Sharpe_y(pnl), f'Alpha_{col}')
def pnl_sum(file_name): pnl_root_path = '/mnt/mfs/dat_whs/tmp_pnl_file/{}'.format(file_name) file_list = os.listdir(pnl_root_path) all_pnl_df = pd.DataFrame() for pnl_file in file_list: pnl_df = pd.read_csv(os.path.join(pnl_root_path, pnl_file), index_col=0, header=None) # pnl_df.columns = [pnl_file[:-4]] all_pnl_df = all_pnl_df.add(pnl_df, axis=1, fill_value=0) sharpe_ratio = bt.AZ_Sharpe_y(all_pnl_df) plot_send_result(all_pnl_df, sharpe_ratio, 'top_100_sharpe_pnl')
def bkt_fun(self, pnl_save_path, a_n, i): x, key, fun_name, name1, name2, name3, filter_fun_name, sector_name, \ con_in, con_out_1, con_out_2, con_out_3, con_out_4, ic, \ sp_u, sp_m, sp_d, pot_in, fit_ratio, leve_ratio, \ sp_in, sp_out_1, sp_out_2, sp_out_3, sp_out_4 = a_n.loc[i] mix_factor, con_in_c, con_out_c, ic_c, sp_u_c, sp_m_c, sp_d_c, pot_in_c, fit_ratio_c, leve_ratio_c, \ sp_in_c, sp_out_c, pnl_df_c = self.single_test(fun_name, name1, name2, name3) plot_send_result(pnl_df_c, bt.AZ_Sharpe_y(pnl_df_c), '{}, key={}'.format(i, key)) print('***************************************************') print('now {}\'s is running, key={}, {}, {}, {}, {}'.format( i, key, fun_name, name1, name2, name3)) print(con_in_c, con_out_c, ic_c, sp_u_c, sp_m_c, sp_d_c, pot_in_c, fit_ratio_c, leve_ratio_c, sp_out_c) print(con_in, con_out_1, ic, sp_u, sp_m, sp_d, pot_in, fit_ratio, leve_ratio, sp_out_1) if sp_m > 0: if not os.path.exists( os.path.join(pnl_save_path, '{}|{}|{}.csv'.format( x, key, fun_name))): pnl_df_c.to_pickle( os.path.join(pnl_save_path, '{}|{}|{}.csv'.format(x, key, fun_name))) else: pnl_df_c.to_pickle( os.path.join(pnl_save_path, '{}|{}|{}.csv'.format(x, key, fun_name))) print('file exist!') return mix_factor else: if not os.path.exists( os.path.join(pnl_save_path, '{}|{}|{}.csv'.format( x, key, fun_name))): (-pnl_df_c).to_pickle( os.path.join(pnl_save_path, '{}|{}|{}.csv'.format(x, key, fun_name))) else: (-pnl_df_c).to_pickle( os.path.join(pnl_save_path, '{}|{}|{}.csv'.format(x, key, fun_name))) print('file exist!') return -mix_factor
def get_corr_matrix(pos_file_list, cut_date=None): return_df = AZ_Load_csv( '/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_14/aadj_r.csv').astype(float) index_df_1 = load_index_data(return_df.index, '000300').fillna(0) index_df_2 = load_index_data(return_df.index, '000905').fillna(0) hedge_df = 0.5 * index_df_1 + 0.5 * index_df_2 return_df = return_df.sub(hedge_df, axis=0) sum_pnl_df = pd.DataFrame() for pos_file_name in pos_file_list: pos_df = AZ_Load_csv('/mnt/mfs/AAPOS/{}'.format(pos_file_name)) pnl_df = (pos_df.shift(2) * return_df).sum(axis=1).replace( 0, np.nan).fillna(method='ffill').dropna() pnl_df.name = pos_file_name sum_pnl_df = pd.concat([sum_pnl_df, pnl_df], axis=1) plot_send_result(pnl_df, bt.AZ_Sharpe_y(pnl_df), pos_file_name) if cut_date is not None: sum_pnl_df = sum_pnl_df[sum_pnl_df.index > cut_date] return sum_pnl_df, sum_pnl_df.corr()
def filter_time_para_fun(time_para_dict, pos_df_daily, adj_return, if_return_pnl, if_only_long): if if_only_long: pnl_df = (pos_df_daily[pos_df_daily > 0] * adj_return).sum(axis=1) pnl_df = pnl_df.replace(np.nan, 0) else: pnl_df = (pos_df_daily * adj_return).sum(axis=1) pnl_df = pnl_df.replace(np.nan, 0) result_dict = OrderedDict() for time_key in time_para_dict.keys(): begin_para, cut_para, end_para_1, end_para_2, end_para_3, end_para_4 = time_para_dict[ time_key] # 样本内索引 sample_in_index = (adj_return.index >= begin_para) & (adj_return.index < cut_para) # 样本外索引 sample_out_index_1 = (adj_return.index >= cut_para) & (adj_return.index < end_para_1) sample_out_index_2 = (adj_return.index >= cut_para) & (adj_return.index < end_para_2) sample_out_index_3 = (adj_return.index >= cut_para) & (adj_return.index < end_para_3) sample_out_index_4 = (adj_return.index >= cut_para) & (adj_return.index < end_para_4) # 样本内表现 pos_df_daily_in = pos_df_daily[sample_in_index] if len(pos_df_daily_in.abs().sum(axis=1).replace( 0, np.nan).dropna()) / len(pos_df_daily_in) < 0.1: continue adj_return_in = adj_return[sample_in_index] pnl_df_in = pnl_df[sample_in_index] asset_df_in = pnl_df_in.cumsum() last_asset_in = asset_df_in.iloc[-1] pot_in = AZ_Pot(pos_df_daily_in, last_asset_in) leve_ratio = AZ_Leverage_ratio(asset_df_in) if leve_ratio < 0: leve_ratio = 100 sharpe_q_in_df = bt.AZ_Rolling_sharpe(pnl_df_in, roll_year=1, year_len=250, min_periods=1, cut_point_list=[0.3, 0.5, 0.7], output=False) sharpe_q_in_df = round(sharpe_q_in_df, 4) sp_in = bt.AZ_Sharpe_y(pnl_df_in) fit_ratio = bt.AZ_fit_ratio(pos_df_daily_in, adj_return_in) ic = round( bt.AZ_Normal_IC(pos_df_daily_in, adj_return_in, min_valids=None, lag=0).mean(), 6) sp_in_u, sp_in_m, sp_in_d = sharpe_q_in_df.values in_condition_u = sp_in_u > 0.9 and leve_ratio > 1 in_condition_d = sp_in_d < -0.9 and leve_ratio > 1 # 分双边和只做多 if if_only_long: in_condition = in_condition_u else: in_condition = in_condition_u | in_condition_d if sp_in_m > 0: way = 1 else: way = -1 # 样本外表现 pnl_df_out_1 = pnl_df[sample_out_index_1] pnl_df_out_2 = pnl_df[sample_out_index_2] pnl_df_out_3 = pnl_df[sample_out_index_3] pnl_df_out_4 = pnl_df[sample_out_index_4] out_condition_1, sp_out_1 = out_sample_perf_c(pnl_df_out_1, way=way) out_condition_2, sp_out_2 = out_sample_perf_c(pnl_df_out_2, way=way) out_condition_3, sp_out_3 = out_sample_perf_c(pnl_df_out_3, way=way) out_condition_4, sp_out_4 = out_sample_perf_c(pnl_df_out_4, way=way) if if_return_pnl: result_dict[time_key] = [ in_condition, out_condition_1, out_condition_2, out_condition_3, out_condition_4, ic, sp_in_u, sp_in_m, sp_in_d, pot_in, fit_ratio, leve_ratio, sp_in, sp_out_1, sp_out_2, sp_out_3, sp_out_4, pnl_df ] else: result_dict[time_key] = [ in_condition, out_condition_1, out_condition_2, out_condition_3, out_condition_4, ic, sp_in_u, sp_in_m, sp_in_d, pot_in, fit_ratio, leve_ratio, sp_in, sp_out_1, sp_out_2, sp_out_3, sp_out_4 ] return result_dict
def pos_sum(all_use_factor, hold_time): all_use_factor['sort_line'] = all_use_factor['sp_u'].abs() all_use_factor.sort_values(by='sort_line', inplace=True, ascending=False) all_use_factor.drop(columns='sort_line', inplace=True) filter_cond = all_use_factor.apply( lambda x: not ('volume_count_down_p60d' in set(x)), axis=1) all_use_factor = all_use_factor[filter_cond] a = all_use_factor[all_use_factor.pot_in.abs() > 20] a = a.iloc[:10] b = a.copy() b['buy_sell'] = (a['sp_m'] > 0).astype(int).replace(0, -1) print(b['con_out'].sum() / len(a), len(a)) factor_info = b[['fun_name', 'name1', 'name2', 'name3', 'buy_sell']].replace(0, -1) config = dict() config['factor_info'] = factor_info pd.to_pickle(config, '/mnt/mfs/alpha_whs/config01.pkl') sum_factor_df = pd.DataFrame(columns=xnms, index=xinx) for i in a.index: key, fun_name, name1, name2, name3, filter_fun_name, sector_name, con_in, con_out, ic, sp_u, sp_m, sp_d, \ pot_in, fit_ratio, leve_ratio, sp_out = a.loc[i] print('***************************************************') print('now {}\'s is running, key={}, {}, {}, {}, {}'.format( i, key, fun_name, name1, name2, name3)) fun_set = [mul_fun, sub_fun, add_fun] mix_fun_set = create_fun_set_2(fun_set) fun = mix_fun_set[fun_name] factor_set = load_part_factor(sector_name, xnms, xinx, [name1, name2, name3]) choose_1 = factor_set[name1] choose_2 = factor_set[name2] choose_3 = factor_set[name3] mix_factor = fun(choose_1, choose_2, choose_3) if len(mix_factor.abs().sum(axis=1).replace( 0, np.nan).dropna()) / len(mix_factor) > 0.1: pos_daily = deal_mix_factor(mix_factor, sector_df, suspendday_df, limit_buy_sell_df, hold_time, lag, if_only_long) in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, \ fit_ratio, leve_ratio, sharpe_q_out, pnl_df = filter_all(cut_date, pos_daily, return_choose, index_df, if_hedge=True, hedge_ratio=1, if_return_pnl=True, if_only_long=if_only_long) plot_send_result(pnl_df, bt.AZ_Sharpe_y(pnl_df), '{}, key={}'.format(i, key)) print(con_in, con_out, ic, sp_u, sp_m, sp_d, pot_in, fit_ratio, leve_ratio, sp_out) print(in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, fit_ratio, leve_ratio, sharpe_q_out) if sp_m > 0: sum_factor_df = sum_factor_df.add(mix_factor, fill_value=0) else: sum_factor_df = sum_factor_df.add(-mix_factor, fill_value=0) else: print('pos not enough!') sum_pos_df = deal_mix_factor(sum_factor_df, sector_df, suspendday_df, limit_buy_sell_df, hold_time, lag, if_only_long).round(14) in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, \ fit_ratio, leve_ratio, sharpe_q_out, pnl_df = filter_all(cut_date, sum_pos_df, return_choose, index_df, if_hedge=True, hedge_ratio=1, if_return_pnl=True, if_only_long=if_only_long) print(in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, fit_ratio, leve_ratio, sharpe_q_out) return pnl_df, sum_pos_df
xinx = sector_df.index # suspend or limit up_dn suspendday_df, limit_buy_sell_df = load_locked_data_both(xnms, xinx) # return return_choose = pd.read_table( '/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_14/aadj_r.csv', sep='|', index_col=0).astype(float) return_choose.index = pd.to_datetime(return_choose.index) return_choose = return_choose.reindex(columns=xnms, index=xinx, fill_value=0) # index data index_df = load_index_data(xinx, index_name) all_use_factor = pd.read_table(log_result, sep='|', header=None) all_use_factor.columns = [ 'key', 'fun_name', 'name1', 'name2', 'name3', 'filter_fun_name', 'sector_name', 'con_in', 'con_out', 'ic', 'sp_u', 'sp_m', 'sp_d', 'pot_in', 'fit_ratio', 'leve_ratio', 'sp_out' ] # 生成pnl文件 # create_pnl_file(all_use_factor, file_name, hold_time) # 将生成的pnl文件组合 pnl_df, sum_pos_df = pos_sum(all_use_factor, hold_time) plot_send_result(pnl_df, bt.AZ_Sharpe_y(pnl_df), 'sum_pos_plot')
market_top_n.dropna(how='all', axis='columns', inplace=True) xnms = market_top_n.columns xinx = market_top_n.index return_df = bt.AZ_Load_csv(os.path.join(root_path, 'EM_Funda/DERIVED_14/aadj_r.csv')).astype(float) signal_df = (return_df < -0.097).astype(int) signal_df = signal_df.reindex(columns=xnms, index=xinx, fill_value=1) signal_df = (signal_df.fillna(0).diff() < 0).astype(int) signal_df.replace(0, np.nan, inplace=True) pos_df = signal_df.fillna(method='ffill', limit=5) # pos_df = pos_daily_fun(signal_df, n=20) pos_df = pos_df.div(pos_df.abs().sum(axis=1).replace(0, np.nan), axis=0) pos_df[pos_df > 0.1] = 0.1 index_df_1 = load_index_data('000300').fillna(0) index_df_2 = load_index_data('000905').fillna(0) hedge_df = 0.5 * index_df_1 + 0.5 * index_df_2 return_choose = bt.AZ_Load_csv(os.path.join(root_path, 'EM_Funda/DERIVED_14/aadj_r.csv')) return_choose = return_choose.reindex(index=xinx, columns=xnms) return_choose = return_choose.sub(hedge_df, axis=0) pnl_df = (-return_choose * pos_df.shift(2)).sum(axis=1) - 0.003 * pos_df.shift(2).fillna(0).diff().abs().sum(axis=1) # pnl_df = (-return_choose * pos_df.shift(2)).sum(axis=1) print(1) plot_send_result(pnl_df, bt.AZ_Sharpe_y(pnl_df), 'aaa_b')
import loc_lib.shared_tools.back_test as bt def plot_send_result(pnl_df, sharpe_ratio, subject): figure_save_path = '/mnt/mfs/dat_whs/tmp_figure' plt.figure(figsize=[16, 8]) plt.plot(pnl_df.index, pnl_df.cumsum(), label='sharpe_ratio='.format(sharpe_ratio)) plt.legend() plt.savefig(os.path.join(figure_save_path, '{}.png'.format(subject))) text = '' to = ['*****@*****.**'] filepath = [os.path.join(figure_save_path, '{}.png'.format(subject))] send_email.send_email(text, to, filepath, subject) if __name__ == '__main__': pos_file_name = 'RZJNORMAL10.pos' pos_df = bt.AZ_Load_csv( '/mnt/mfs/AAPOS/{}'.format(pos_file_name)).iloc[-100:] xnms = pos_df.columns xinx = pos_df.index return_df = bt.AZ_Load_csv( '/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_14/aadj_r.csv').astype(float) return_df = return_df.reindex(index=xinx, columns=xnms) pnl_df = (pos_df.shift(2) * return_df).sum(axis=1) plot_send_result(pnl_df, bt.AZ_Sharpe_y(pnl_df), 'mix_factor') sharpe_ratio = bt.AZ_Sharpe_y(pnl_df) plot_send_result(pnl_df, sharpe_ratio, pos_file_name)
def pos_sum_c(self, data, time_para, result_file_name, pot_in_num, leve_ratio_num, sp_in, ic_num, fit_ratio): time_para_dict['time_para_1'] = [ pd.to_datetime('20110101'), pd.to_datetime('20150101'), pd.to_datetime('20150701') ] time_para_dict['time_para_2'] = [ pd.to_datetime('20120101'), pd.to_datetime('20160101'), pd.to_datetime('20160701') ] time_para_dict['time_para_3'] = [ pd.to_datetime('20130601'), pd.to_datetime('20170601'), pd.to_datetime('20171201') ] time_para_dict['time_para_4'] = [ pd.to_datetime('20140601'), pd.to_datetime('20180601'), pd.to_datetime('20181001') ] time_para_dict['time_para_5'] = [ pd.to_datetime('20140701'), pd.to_datetime('20180701'), pd.to_datetime('20181001') ] time_para_dict['time_para_6'] = [ pd.to_datetime('20140801'), pd.to_datetime('20180801'), pd.to_datetime('20181001') ] data_n = data[data['time_para'] == time_para] begin_date, cut_date, end_date = time_para_dict[time_para] a_n = data_n[(data_n['ic'].abs() > ic_num) & (data_n['pot_in'].abs() > pot_in_num) & (data_n['leve_ratio'].abs() > leve_ratio_num) & (data_n['sp_in'].abs() > sp_in) & (data_n['fit_ratio'].abs() > fit_ratio)] sum_factor_df = pd.DataFrame() pnl_save_path = '/mnt/mfs/dat_whs/data/mix_factor_pnl/' + result_file_name bt.AZ_Path_create(pnl_save_path) result_list = [] pool = Pool(20) for i in a_n.index: # bkt_fun(pnl_save_path, a_n, i) result_list.append( pool.apply_async(bkt_fun, args=( pnl_save_path, a_n, i, ))) pool.close() pool.join() for res in result_list: sum_factor_df = sum_factor_df.add(res.get(), fill_value=0) sum_pos_df = self.deal_mix_factor(sum_factor_df).shift(2) in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, \ fit_ratio, leve_ratio, sp_in, sharpe_q_out, pnl_df = filter_all(cut_date, sum_pos_df, main_model.return_choose, if_return_pnl=True, if_only_long=False) print(in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, fit_ratio, leve_ratio, sp_in, sharpe_q_out) plot_send_result(pnl_df, bt.AZ_Sharpe_y(pnl_df), 'mix_factor') return sum_pos_df, pnl_df