def plot_save_fun(company, tmp_df, return_df, locked_df, hold_time_list, index_df, now_time, if_hedge=False): fig_save_path = '/mnt/mfs/dat_whs/tmp_figure/' plt.figure(figsize=[16, 8]) a = 0 for hold_time in hold_time_list: daily_pos = deal_mix_factor(tmp_df, None, locked_df, hold_time, 2, if_only_long=False) if if_hedge: hedge_df = index_df.mul(daily_pos.sum(axis=1), axis=0) pnl_df = -hedge_df.sub((daily_pos * return_df).sum(axis=1), axis=0)['000300'] else: pnl_df = (daily_pos * return_df).sum(axis=1) asset_df = pnl_df.cumsum() sp_y = bt.AZ_Sharpe_y(pnl_df) pot = AZ_Pot(daily_pos, asset_df.values[-1]) if sp_y > 1: a = 1 print(1) plt.plot(pnl_df.index, asset_df, label='hold_time={}, pot={}, sharpe={}'.format(hold_time, pot, sp_y)) plt.legend() plt.title(company + '_' + now_time) plt.savefig(fig_save_path + 'company.png') plt.close() if a == 1: print('send_email') text = '' to = ['*****@*****.**'] filepath = [fig_save_path + 'company.png'] subject = company send_email.send_email(text, to, filepath, subject)
def create_pnl_file(all_use_factor, file_name, hold_time): pnl_root_path = os.path.join(root_path, 'tmp_pnl_file/{}'.format(file_name)) bt.AZ_Path_create(pnl_root_path) use_factor_pot = all_use_factor[all_use_factor['pot_in'].abs() > 40] use_factor_pot.sort_values(by='sp_u', inplace=True) for i in use_factor_pot.index[:3]: key, fun_name, name1, name2, name3, filter_fun_name, sector_name, *result = all_use_factor.iloc[ i] print('**************************************') print('now {}\'s is running, key={}'.format(i, key)) save_file_name = '|'.join([ str(key), fun_name, name1, name2, name3, filter_fun_name, sector_name ]) + '.csv' fun_set = [mul_fun, sub_fun, add_fun] mix_fun_set = create_fun_set_2(fun_set) fun = mix_fun_set[fun_name] factor_set = load_part_factor(sector_name, xnms, xinx, [name1, name2, name3]) choose_1 = factor_set[name1] choose_2 = factor_set[name2] choose_3 = factor_set[name3] mix_factor = fun(choose_1, choose_2, choose_3) daily_pos = deal_mix_factor(mix_factor, sector_df, suspendday_df, limit_buy_sell_df, hold_time, lag, if_only_long) in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, \ fit_ratio, leve_ratio, sharpe_q_out, pnl_df = filter_all(cut_date, daily_pos, return_choose, index_df, if_hedge=True, hedge_ratio=1, if_return_pnl=True, if_only_long=if_only_long) print(in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, sharpe_q_out) print(result)
def pos_sum(all_use_factor, hold_time): all_use_factor['sort_line'] = all_use_factor['sp_u'].abs() all_use_factor.sort_values(by='sort_line', inplace=True, ascending=False) all_use_factor.drop(columns='sort_line', inplace=True) filter_cond = all_use_factor.apply( lambda x: not ('volume_count_down_p60d' in set(x)), axis=1) all_use_factor = all_use_factor[filter_cond] a = all_use_factor[all_use_factor.pot_in.abs() > 20] a = a.iloc[:10] b = a.copy() b['buy_sell'] = (a['sp_m'] > 0).astype(int).replace(0, -1) print(b['con_out'].sum() / len(a), len(a)) factor_info = b[['fun_name', 'name1', 'name2', 'name3', 'buy_sell']].replace(0, -1) config = dict() config['factor_info'] = factor_info pd.to_pickle(config, '/mnt/mfs/alpha_whs/config01.pkl') sum_factor_df = pd.DataFrame(columns=xnms, index=xinx) for i in a.index: key, fun_name, name1, name2, name3, filter_fun_name, sector_name, con_in, con_out, ic, sp_u, sp_m, sp_d, \ pot_in, fit_ratio, leve_ratio, sp_out = a.loc[i] print('***************************************************') print('now {}\'s is running, key={}, {}, {}, {}, {}'.format( i, key, fun_name, name1, name2, name3)) fun_set = [mul_fun, sub_fun, add_fun] mix_fun_set = create_fun_set_2(fun_set) fun = mix_fun_set[fun_name] factor_set = load_part_factor(sector_name, xnms, xinx, [name1, name2, name3]) choose_1 = factor_set[name1] choose_2 = factor_set[name2] choose_3 = factor_set[name3] mix_factor = fun(choose_1, choose_2, choose_3) if len(mix_factor.abs().sum(axis=1).replace( 0, np.nan).dropna()) / len(mix_factor) > 0.1: pos_daily = deal_mix_factor(mix_factor, sector_df, suspendday_df, limit_buy_sell_df, hold_time, lag, if_only_long) in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, \ fit_ratio, leve_ratio, sharpe_q_out, pnl_df = filter_all(cut_date, pos_daily, return_choose, index_df, if_hedge=True, hedge_ratio=1, if_return_pnl=True, if_only_long=if_only_long) plot_send_result(pnl_df, bt.AZ_Sharpe_y(pnl_df), '{}, key={}'.format(i, key)) print(con_in, con_out, ic, sp_u, sp_m, sp_d, pot_in, fit_ratio, leve_ratio, sp_out) print(in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, fit_ratio, leve_ratio, sharpe_q_out) if sp_m > 0: sum_factor_df = sum_factor_df.add(mix_factor, fill_value=0) else: sum_factor_df = sum_factor_df.add(-mix_factor, fill_value=0) else: print('pos not enough!') sum_pos_df = deal_mix_factor(sum_factor_df, sector_df, suspendday_df, limit_buy_sell_df, hold_time, lag, if_only_long).round(14) in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, \ fit_ratio, leve_ratio, sharpe_q_out, pnl_df = filter_all(cut_date, sum_pos_df, return_choose, index_df, if_hedge=True, hedge_ratio=1, if_return_pnl=True, if_only_long=if_only_long) print(in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, fit_ratio, leve_ratio, sharpe_q_out) return pnl_df, sum_pos_df
def para_result(begin_date, end_date, sector_df, locked_df, index_df, hold_time, i, fun_name, name_1, name_2, name_3, sector_name, result, if_only_long=False): cost_1 = 0.001 lag = 1 xnms, xinx = sector_df.columns, sector_df.index r_con_in, r_con_out, r_ic, r_sp_u, r_sp_m, r_sp_d, r_pot_in, r_fit_ratio, r_leve_ratio, r_sharpe_out = result figure_save_path = '/mnt/mfs/dat_whs/result/tmp' # figure_save_path = os.path.join(root_path, 'tmp_figure') fun_set = [mul_fun, sub_fun, add_fun] mix_fun_set = create_fun_set_2(fun_set) fun = mix_fun_set[fun_name] factor_set = load_part_factor(sector_name, begin_date, end_date, xnms, xinx, [name_1, name_2, name_3]) choose_1 = factor_set[name_1] choose_2 = factor_set[name_2] choose_3 = factor_set[name_3] return_data = load_pct(begin_date, end_date, xnms, xinx) mix_factor = fun(choose_1, choose_2, choose_3) pos_df_daily = deal_mix_factor(mix_factor, sector_df, locked_df, hold_time, lag, if_only_long) in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, \ fit_ratio, leve_ratio, sharpe_q_out, pnl_df = filter_all(cut_date, pos_df_daily, return_data, index_df, if_hedge=False, hedge_ratio=1, if_return_pnl=True, if_only_long=False) hedge_df = 1 * index_df.mul(pos_df_daily.sum(axis=1), axis=0) pnl_df_h = -hedge_df.sub((pos_df_daily * return_data).sum(axis=1), axis=0) pnl_df_h = pnl_df_h[pnl_df_h.columns[0]] fig = plt.figure(figsize=(16, 12)) fig.suptitle('{} factor figure'.format(i), fontsize=40) ax1 = fig.add_subplot(2, 2, 1) ax2 = fig.add_subplot(2, 2, 2) ax3 = fig.add_subplot(2, 2, 3) ax4 = fig.add_subplot(2, 2, 4) asset = pnl_df.cumsum() asset_hedge = pnl_df_h.cumsum() pot = bt.AZ_Pot(pos_df_daily, asset.iloc[-1]) sharpe = bt.AZ_Sharpe_y(pnl_df) pot_h = bt.AZ_Pot(pos_df_daily, asset_hedge.iloc[-1]) sharpe_h = bt.AZ_Sharpe_y(pnl_df_h) if asset.values[-1] < 0: asset = -asset asset_hedge = -asset_hedge cost_matrix_1 = (pos_df_daily.diff().abs() * cost_1).sum(axis=1).cumsum() ax1.plot(pnl_df.index, asset, label='raw_line, pot={}, sharpe={}'.format(pot, sharpe)) ax1.plot(pnl_df.index, asset - cost_matrix_1, label='cost_line, pot={}, sharpe={}'.format(pot, sharpe)) ax1.plot(pnl_df.index, asset_hedge, label='helge_line, pot_h={}, sharpe_h={}'.format(pot_h, sharpe_h)) ax1.set_title('{}, {}, {}, {}'.format(fun_name, name_1, name_2, name_3)) ax1.grid(1) ax1.legend() rolling_sharpe, cut_sharpe = AZ_Rolling_sharpe(pnl_df, roll_year=3, year_len=250) ax2.hist(rolling_sharpe.values, bins=200) ax2.set_title('{},{},{},{},{},{},{},{}'.format(round(r_ic, 4), r_sp_u, r_sp_m, r_sp_d, r_pot_in, round(r_fit_ratio, 4), round(r_leve_ratio, 4), r_sharpe_out)) ax2.grid(axis='y') result_filter = filter_pot_sharpe(cut_date, fun(choose_1, choose_2, choose_3), return_data, index_df, lag=1, hold_time=5, if_hedge=False, hedge_ratio=1) ax3.set_title(','.join([str(x) for x in result_filter])) ax4.set_title( 'pot_in={}, leve_ratio={}, sharpe_q_in_df_m={}, fit_ratio={}'.format( pot_in, round(leve_ratio, 4), sharpe_q_in_df_m, round(fit_ratio, 4))) # plt.show() # ax3.plot(pos_df_daily.sum(axis=1), label=','.join([str(x) for x in result])) ax3.plot(bt.AZ_Rolling(pos_df_daily.sum(axis=1), 250).mean()) ax3.legend() plt.savefig( os.path.join(figure_save_path, '|'.join( [fun_name, name_1, name_2, name_3])) + '.png') # text = '|'.join([str(x) for x in [fun_name, name_1, name_2, name_3] + list(result)]) text = '.' to = ['*****@*****.**'] subject = '|'.join([fun_name, name_1, name_2, name_3]) filepath = [ os.path.join(figure_save_path, '|'.join( [fun_name, name_1, name_2, name_3])) + '.png' ] send_email.send_email(text, to, filepath, subject)
def part_test_index_3(time_para_dict, sector_name, key, name_1, name_2, name_3, sector_df, suspendday_df, limit_buy_sell_df, return_choose, index_df, cut_date, log_save_file, result_save_file, if_save, if_hedge, hold_time, if_only_long, xnms, xinx, total_para_num): lock = Lock() lag = 2 start_time = time.time() load_time_1 = time.time() # load因子,同时根据stock_universe筛选数据. factor_set = load_part_factor(sector_name, xnms, xinx, [name_1, name_2, name_3]) load_time_2 = time.time() # 加载花费数据时间 load_delta = round(load_time_2 - load_time_1, 2) # 生成混合函数集 fun_set = [sub_fun, add_fun, mul_fun] fun_mix_2_set = create_fun_set_2(fun_set) ################# # 更换filter函数 # ################# filter_fun = filter_all filter_name = filter_fun.__name__ # save_file_name = os.path.split(log_save_file)[:-4] # pnl_save_path = os.path.join('/mnt/mfs/dat_whs/data/mix_factor_pnl', save_file_name) # bt.AZ_Path_create(pnl_save_path) for fun in fun_mix_2_set: mix_factor = fun(factor_set[name_1], factor_set[name_2], factor_set[name_3]) if len(mix_factor.abs().sum(axis=1).replace( 0, np.nan).dropna()) / len(mix_factor) < 0.1: # print('{}%, {}, {}, {}, {}, ERROR pos not enough, {}' # .format(round(key / total_para_num, 4) * 100, key, name_1, name_2, name_3, # mix_factor.sum(axis=1).mean())) continue daily_pos = deal_mix_factor(mix_factor, sector_df, suspendday_df, limit_buy_sell_df, hold_time, lag, if_only_long) # 返回样本内筛选结果 result_dict = filter_time_para_fun(time_para_dict, daily_pos, return_choose, index_df, if_hedge=if_hedge, hedge_ratio=1, if_return_pnl=False, if_only_long=if_only_long) for time_key in result_dict.keys(): in_condition, *filter_result = result_dict[time_key] # result 存储 if in_condition: if if_save: with lock: f = open(result_save_file, 'a') write_list = [ time_key, key, fun.__name__, name_1, name_2, name_3, filter_name, sector_name, in_condition ] + filter_result f.write('|'.join([str(x) for x in write_list]) + '\n') print([ time_key, in_condition, fun.__name__, name_1, name_2, name_3 ] + filter_result) end_time = time.time() # 参数存储 if if_save: with lock: f = open(log_save_file, 'a') write_list = [ key, name_1, name_2, name_3, filter_name, sector_name, round(end_time - start_time, 4), load_delta ] f.write('|'.join([str(x) for x in write_list]) + '\n') print('{}%, {}, {}, {}, {}, cost {} seconds, load_cost {} seconds'.format( round(key / total_para_num * 100, 4), key, name_1, name_2, name_3, round(end_time - start_time, 2), load_delta))
def create_pnl_file_and_delete_factor(factor_root_path, sector_name_list): begin_date = pd.to_datetime('20100101') cut_date = pd.to_datetime('20160401') end_date = pd.to_datetime('20180401') # sector_name = 'market_top_100' index_name = '000016' for sector_name in sector_name_list: # sector sector_df = load_sector_data(begin_date, end_date, sector_name) xnms = sector_df.columns xinx = sector_df.index # suspend or limit up_dn # suspendday_df, limit_buy_df, limit_sell_df = load_locked_data(xnms, xinx) suspendday_df, limit_buy_sell_df = load_locked_data_both(xnms, xinx) # return return_choose = pd.read_table( '/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_14/aadj_r.csv', sep='|', index_col=0).astype(float) return_choose.index = pd.to_datetime(return_choose.index) return_choose = return_choose.reindex(columns=xnms, index=xinx, fill_value=0) # index data index_df = load_index_data(xinx, index_name) factor_path = factor_root_path + '/' + sector_name factor_name_list = [x for x in os.listdir(factor_path) if 'pkl' in x] save_pnl_path = os.path.join(root_path, 'data/single_factor_pnl/' + sector_name) # bt.AZ_Delete_file(save_pnl_path) for factor_name in factor_name_list: factor_load_path = os.path.join(factor_path, factor_name) print(factor_load_path) # if not os.path.exists(os.path.join(save_pnl_path, factor_name[:-4] + '.csv')): factor_df = pd.read_pickle(factor_load_path) factor_df = factor_df.reindex(columns=xnms, index=xinx, fill_value=0) daily_pos = deal_mix_factor(factor_df, sector_df, suspendday_df, limit_buy_sell_df, hold_time=5, lag=2, if_only_long=False) pnl_df = (daily_pos * return_choose).sum(axis=1) bt.AZ_Path_create(save_pnl_path) pnl_df = pd.DataFrame(pnl_df, columns=[factor_name[:-4]]) # pnl_df.to_csv(os.path.join(save_pnl_path, factor_name[:-4] + '.csv')) if len(pnl_df.replace(0, np.nan).dropna()) / len(pnl_df) < 0.3: print(factor_name + ' is delete') os.remove(factor_load_path) else: pnl_df.to_csv( os.path.join(save_pnl_path, factor_name[:-4] + '.csv')) print('pnl create!')