def plot_send_data(raw_df, subject, text=''): raw_df.plot(legend=True) plt.savefig(f'{figure_save_path}/{subject}.png') plt.close() to = ['*****@*****.**'] filepath = [f'{figure_save_path}/{subject}.png'] send_email.send_email(text, to, filepath, subject)
def savfig_send(subject='tmp', text='', to=None, filepath=None): target_save_path = f'{figure_save_path}/{subject}.png' if to is None: to = ['*****@*****.**'] if filepath is None: filepath = [target_save_path] plt.savefig(target_save_path) send_email.send_email(text, to, filepath, subject) plt.close()
def plot_send_result(pnl_df, sharpe_ratio, subject, text=''): plt.figure(figsize=[16, 8]) plt.plot(pnl_df.index, pnl_df.cumsum(), label='sharpe_ratio={}'.format(sharpe_ratio)) plt.grid() plt.legend() plt.savefig(os.path.join(figure_save_path, '{}.png'.format(subject))) plt.close() to = ['*****@*****.**'] filepath = [os.path.join(figure_save_path, '{}.png'.format(subject))] send_email.send_email(text, to, filepath, subject)
def plot_send_result_mul(pnl_df, subject, text=''): assert type(pnl_df) == pd.DataFrame pnl_num = len(pnl_df.columns) plt.figure(figsize=[16, 8 * pnl_num]) for i, col in enumerate(pnl_df.columns): ax = plt.subplot(pnl_num, 1, i + 1) ax.plot(pnl_df[col].index, pnl_df[col].cumsum(), label=f'{col}, sharpe_ratio={bt.AZ_Sharpe_y(pnl_df[col])}') plt.grid() plt.legend() plt.savefig(os.path.join(figure_save_path, '{}.png'.format(subject))) plt.close() to = ['*****@*****.**'] filepath = [os.path.join(figure_save_path, '{}.png'.format(subject))] send_email.send_email(text, to, filepath, subject)
def corr_test_fun(pnl_df, alpha_name): sum_pnl_df = get_corr_matrix(cut_date=None) sum_pnl_df_c = pd.concat([sum_pnl_df, pnl_df], axis=1) corr_self = sum_pnl_df_c.corr()[[alpha_name]] other_corr = get_all_pnl_corr(pnl_df, alpha_name) print(other_corr) self_corr = corr_self[corr_self > 0.7].dropna(axis=0) print(self_corr) if len(self_corr) >= 2 or len(other_corr) >= 2: print('FAIL!') send_email.send_email('FAIL!\n' + corr_self.to_html(), ['*****@*****.**'], [], '[RESULT DEAL]' + alpha_name) else: print('SUCCESS!') send_email.send_email('SUCCESS!\n' + corr_self.to_html(), ['*****@*****.**'], [], '[RESULT DEAL]' + alpha_name) print('______________________________________')
def config_test(main_model, config_name, result_file_name, cut_date): config_set = pd.read_pickle( f'/mnt/mfs/dat_whs/alpha_data/{config_name}.pkl') config_data = config_set['factor_info'] sum_factor_df = pd.DataFrame() for i in config_data.index: fun_name, name1, name2, name3, buy_sell = config_data.loc[i] print('***************************************************') print('now {}\'s is running, key={}, {}, {}, {}'.format( i, fun_name, name1, name2, name3)) mix_factor, con_in_c, con_out_c, ic_c, sp_u_c, sp_m_c, sp_d_c, pot_in_c, fit_ratio_c, leve_ratio_c, sp_in_c, \ sp_out_c, pnl_df_c = main_model.single_test(fun_name, name1, name2, name3) # plot_send_result(pnl_df_c, bt.AZ_Sharpe_y(pnl_df_c), '{}, {}, {}, {}, {}' # .format(fun_name, name1, name2, name3, buy_sell)) # print(con_in_c, con_out_c, ic_c, sp_u_c, sp_m_c, sp_d_c, pot_in_c, fit_ratio_c, leve_ratio_c, sp_out_c) if buy_sell > 0: sum_factor_df = sum_factor_df.add(mix_factor, fill_value=0) else: sum_factor_df = sum_factor_df.add(-mix_factor, fill_value=0) sum_pos_df = main_model.deal_mix_factor(sum_factor_df).shift(2) in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, \ fit_ratio, leve_ratio, sp_in, sharpe_q_out, pnl_df = filter_all(cut_date, sum_pos_df, main_model.return_choose, if_return_pnl=True, if_only_long=False) print(in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, fit_ratio, leve_ratio, sp_in, sharpe_q_out) sp = bt.AZ_Sharpe_y(pnl_df) pnl_df.to_csv(f'/mnt/mfs/dat_whs/tmp_pnl_file/{result_file_name}.csv') send_list = [ in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, fit_ratio, leve_ratio, sp_in, sharpe_q_out ] send_email.send_email(','.join([str(x) for x in send_list]), ['*****@*****.**'], [], '[RESULT DEAL]' + result_file_name) plot_send_result(pnl_df, bt.AZ_Sharpe_y(pnl_df), '[RESULT DEAL]' + result_file_name) return sum_pos_df, pnl_df, sp
def main(result_file_name, time_para_dict): print( '*******************************************************************************************************' ) root_path = '/mnt/mfs/DAT_EQT' # result_file_name = 'market_top_800plus_True_20181104_0237_hold_5__7' # root_path = '/media/hdd1/DAT_EQT' config_name = result_file_name if_save = False if_new_program = True hold_time = int(result_file_name.split('hold')[-1].split('_')[1]) # 加载对应脚本 if result_file_name.split('_')[-1] == 'long': script_num = result_file_name.split('_')[-2] if_only_long = True else: script_num = result_file_name.split('_')[-1] if_only_long = False # script_num = '15' print('script_num : ', script_num) print('hold_time : ', hold_time) loc = locals() exec( f'from work_whs.AZ_2018_Q2.factor_script.main_file import main_file_sector_{script_num} as mf' ) mf = loc['mf'] time_para_dict = mf.time_para_dict # from work_whs.AZ_2018_Q2.factor_script.main_file import main_file_sector_6 as mf lag = 2 return_file = '' sector_name, if_hedge = find_sector_name(result_file_name) if_hedge = True # sector_name = 'market_top_800plus_industry_10_15' print(result_file_name) print(sector_name) result_path = '/mnt/mfs/dat_whs/result/result/{}.txt'.format( result_file_name) # ############################################################################# # 判断文件大小 if os.path.getsize(result_path): data = pd.read_csv(result_path, sep='|', header=None, error_bad_lines=False) data.columns = [ 'time_para', 'key', 'fun_name', 'name1', 'name2', 'name3', 'filter_fun_name', 'sector_name', 'con_in', 'con_out_1', 'con_out_2', 'con_out_3', 'con_out_4', 'ic', 'sp_u', 'sp_m', 'sp_d', 'pot_in', 'fit_ratio', 'leve_ratio', 'sp_in', 'sp_out_1', 'sp_out_2', 'sp_out_3', 'sp_out_4' ] else: return 0 filter_cond = data[['name1', 'name2', 'name3']] \ .apply(lambda x: not (('R_COMPANYCODE_First_row_extre_0.3' in set(x)) or ('return_p20d_0.2' in set(x)) or ('price_p120d_hl' in set(x)) or ('return_p60d_0.2' in set(x)) or ('wgt_return_p120d_0.2' in set(x)) or ('wgt_return_p20d_0.2' in set(x)) or ('log_price_0.2' in set(x)) or ('TVOL_row_extre_0.2' in set(x)) or ('TVOL_row_extre_0.2' in set(x)) or ('tab2_11_row_extre_0.3' in set(x)) or ('tab1_8_row_extre_0.3' in set(x)) or ('intra_dn_vol_col_score_row_extre_0.3' in set(x)) or ('intra_dn_vol_row_extre_0.3' in set(x)) or ('turn_p30d_0.24' in set(x)) or ('evol_p30d' in set(x)) # ('CMO_40_0' in set(x)) # ('ATR_40_0.2' in set(x)) # ('ADX_200_40_20' in set(x)) # ('ATR_140_0.2' in set(x)) ), axis=1) data = data[filter_cond] para_adj_set_list = [{ 'pot_in_num': 50, 'leve_ratio_num': 2, 'sp_in': 1.5, 'ic_num': 0.0, 'fit_ratio': 2 }, { 'pot_in_num': 40, 'leve_ratio_num': 2, 'sp_in': 1.5, 'ic_num': 0.0, 'fit_ratio': 2 }, { 'pot_in_num': 50, 'leve_ratio_num': 2, 'sp_in': 1, 'ic_num': 0.0, 'fit_ratio': 1 }, { 'pot_in_num': 50, 'leve_ratio_num': 1, 'sp_in': 1, 'ic_num': 0.0, 'fit_ratio': 2 }, { 'pot_in_num': 50, 'leve_ratio_num': 1, 'sp_in': 1, 'ic_num': 0.0, 'fit_ratio': 1 }, { 'pot_in_num': 40, 'leve_ratio_num': 1, 'sp_in': 1, 'ic_num': 0.0, 'fit_ratio': 1 }] # para_adj_set_list_9 = [{'pot_in_num': 30, 'leve_ratio_num': 2, 'sp_in': 1.5, 'ic_num': 0.0, 'fit_ratio': 2}, # {'pot_in_num': 20, 'leve_ratio_num': 2, 'sp_in': 1.5, 'ic_num': 0.0, 'fit_ratio': 2}, # {'pot_in_num': 30, 'leve_ratio_num': 2, 'sp_in': 1, 'ic_num': 0.0, 'fit_ratio': 1}, # {'pot_in_num': 30, 'leve_ratio_num': 1, 'sp_in': 1, 'ic_num': 0.0, 'fit_ratio': 2}, # {'pot_in_num': 30, 'leve_ratio_num': 1, 'sp_in': 1, 'ic_num': 0.0, 'fit_ratio': 1}, # {'pot_in_num': 20, 'leve_ratio_num': 1, 'sp_in': 1, 'ic_num': 0.0, 'fit_ratio': 1}] time_para = 'time_para_5' print(time_para) # ############################################################################# # 结果分析 print('结果分析') survive_result = survive_ratio_test(data, para_adj_set_list) if survive_result is None: print(f'{result_file_name} not satisfaction!!!!!!!!') return 0 else: pass print(hold_time) ############################################################################# # 回测函数 if sector_name.startswith('market_top_300plus'): if_weight = 1 ic_weight = 0 elif sector_name.startswith('market_top_300to800plus'): if_weight = 0 ic_weight = 1 else: if_weight = 0.5 ic_weight = 0.5 print('回测函数') begin_date, cut_date, end_date, end_date, end_date, end_date = time_para_dict[ time_para] main_model = mf.FactorTestSector(root_path, if_save, if_new_program, begin_date, cut_date, end_date, time_para_dict, sector_name, hold_time, lag, return_file, if_hedge, if_only_long, if_weight, ic_weight) sum_pos_df, pnl_df = pos_sum_c(main_model, data, time_para, result_file_name, **survive_result) ############################################################################# # 生成config文件 config_create(main_model, sector_name, result_file_name, config_name, data, time_para, **survive_result, n=5, use_factor_num=40) ########################################################################### # 测试config结果 begin_date, cut_date, end_date, end_date, end_date, end_date = time_para_dict[ time_para] sum_pos_df, pnl_df, sp = config_test(main_model, config_name, result_file_name, cut_date) if sp < 2: return 0 pnl_df.name = result_file_name # ############################################################################# # 计算相关性 # pnl_df_CRTSECJUN # sum_pnl_df = get_corr_matrix(cut_date=None) sum_pnl_df_c = pd.concat([sum_pnl_df, pnl_df], axis=1) corr_self = sum_pnl_df_c.corr()[[result_file_name]] print(corr_self) print('______________________________________') print(corr_self[corr_self > 0.7].dropna(axis=0)) if len(corr_self[corr_self > 0.7].dropna(axis=0)) >= 2: print('FAIL!') send_email.send_email('FAIL!\n' + pd.DataFrame(corr_self).to_html(), ['*****@*****.**'], [], result_file_name) else: print('SUCCESS!') send_email.send_email('SUCCESS!\n' + pd.DataFrame(corr_self).to_html(), ['*****@*****.**'], [], result_file_name) print('______________________________________') return 0