def main_fun(begin_date, cut_date, end_date, time_para_dict, sector_name, index_name, hold_time, return_file, new_factor_list, add_factor_list, if_hedge=False, if_only_long=False): if_save = True if_new_program = True use_factor_set_path = '/mnt/mfs/dat_whs/data/use_factor_set/market_top_2000_201808201941.pkl' return_file = 'aadj_r' para_ready_df, log_save_file, result_save_file = \ save_load_control(use_factor_set_path, sector_name, new_factor_list, add_factor_list, if_save, if_new_program, if_hedge, hold_time, return_file, if_only_long) total_para_num = len(para_ready_df) # sector sector_df = load_sector_data(begin_date, end_date, sector_name) xnms = sector_df.columns xinx = sector_df.index # suspend or limit up_dn # suspendday_df, limit_buy_df, limit_sell_df = load_locked_data(xnms, xinx) suspendday_df, limit_buy_sell_df = load_locked_data(xnms, xinx) # return return_choose = pd.read_table('/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_14/aadj_r.csv', sep='|', index_col=0) \ .astype(float) # return_choose = pd.read_table('/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_14/aadj_r_vwap.csv', sep='|', index_col=0) \ # .astype(float) return_choose.index = pd.to_datetime(return_choose.index) return_choose = return_choose.reindex(columns=xnms, index=xinx, fill_value=0) # index data index_df = load_index_data(xinx, index_name) # index_df = pd.Series(index_df) test_index_3(time_para_dict, sector_name, sector_df, suspendday_df, limit_buy_sell_df, return_choose, index_df, para_ready_df, cut_date, log_save_file, result_save_file, if_save, if_hedge, hold_time, if_only_long, xnms, xinx, total_para_num)
xinx = sector_df.index # suspend or limit up_dn suspendday_df, limit_buy_sell_df = load_locked_data_both(xnms, xinx) # return return_choose = pd.read_table( '/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_14/aadj_r.csv', sep='|', index_col=0).astype(float) return_choose.index = pd.to_datetime(return_choose.index) return_choose = return_choose.reindex(columns=xnms, index=xinx, fill_value=0) # index data index_df = load_index_data(xinx, index_name) all_use_factor = pd.read_table(log_result, sep='|', header=None) all_use_factor.columns = [ 'key', 'fun_name', 'name1', 'name2', 'name3', 'filter_fun_name', 'sector_name', 'con_in', 'con_out', 'ic', 'sp_u', 'sp_m', 'sp_d', 'pot_in', 'fit_ratio', 'leve_ratio', 'sp_out' ] # 生成pnl文件 # create_pnl_file(all_use_factor, file_name, hold_time) # 将生成的pnl文件组合 pnl_df, sum_pos_df = pos_sum(all_use_factor, hold_time) plot_send_result(pnl_df, bt.AZ_Sharpe_y(pnl_df), 'sum_pos_plot')
stk_CR = return_df.replace(np.nan, 0).cumsum() ma5 = bt.AZ_Rolling_mean(stk_CR, 3) ma50 = bt.AZ_Rolling_mean(stk_CR, 12) vol30 = bt.AZ_Rolling(return_df, 30).std() * (250 ** 0.5) vol30[vol30 < 0.08] = 0.08 cond_d = (ma5 < ma50).astype(int) cond_u = (ma5 > ma50).astype(int) hold_time_list = [1, 2, 5] xnms = return_df.columns xinx = return_df.index index_df = load_index_data(begin_date, end_date, xinx, '000300').shift(1) locked_df = load_locked_data(begin_date, end_date, xnms, xinx) use_company_list = get_use_company(data2) aa = random.sample(use_company_list, 200) all_tmp_df = pd.DataFrame() for company in use_company_list: print(company) part_data2 = data2[data2['COMPANYNAME'] == company][['RID', 'TRSDIR']] part_data1 = data1.loc[part_data2['RID'].values][['SECURITYCODE', 'TRADEDATE']] part_data1['TRSDIR'] = part_data2['TRSDIR'].values tmp_df = part_data1.groupby(['TRADEDATE', 'SECURITYCODE'])['TRSDIR'].apply(lambda x: x.iloc[-1]).unstack() tmp_df = (tmp_df == '0').astype(int) tmp_df = tmp_df.reindex(columns=xnms, index=xinx, fill_value=0)
def create_pnl_file_and_delete_factor(factor_root_path, sector_name_list): begin_date = pd.to_datetime('20100101') cut_date = pd.to_datetime('20160401') end_date = pd.to_datetime('20180401') # sector_name = 'market_top_100' index_name = '000016' for sector_name in sector_name_list: # sector sector_df = load_sector_data(begin_date, end_date, sector_name) xnms = sector_df.columns xinx = sector_df.index # suspend or limit up_dn # suspendday_df, limit_buy_df, limit_sell_df = load_locked_data(xnms, xinx) suspendday_df, limit_buy_sell_df = load_locked_data_both(xnms, xinx) # return return_choose = pd.read_table( '/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_14/aadj_r.csv', sep='|', index_col=0).astype(float) return_choose.index = pd.to_datetime(return_choose.index) return_choose = return_choose.reindex(columns=xnms, index=xinx, fill_value=0) # index data index_df = load_index_data(xinx, index_name) factor_path = factor_root_path + '/' + sector_name factor_name_list = [x for x in os.listdir(factor_path) if 'pkl' in x] save_pnl_path = os.path.join(root_path, 'data/single_factor_pnl/' + sector_name) # bt.AZ_Delete_file(save_pnl_path) for factor_name in factor_name_list: factor_load_path = os.path.join(factor_path, factor_name) print(factor_load_path) # if not os.path.exists(os.path.join(save_pnl_path, factor_name[:-4] + '.csv')): factor_df = pd.read_pickle(factor_load_path) factor_df = factor_df.reindex(columns=xnms, index=xinx, fill_value=0) daily_pos = deal_mix_factor(factor_df, sector_df, suspendday_df, limit_buy_sell_df, hold_time=5, lag=2, if_only_long=False) pnl_df = (daily_pos * return_choose).sum(axis=1) bt.AZ_Path_create(save_pnl_path) pnl_df = pd.DataFrame(pnl_df, columns=[factor_name[:-4]]) # pnl_df.to_csv(os.path.join(save_pnl_path, factor_name[:-4] + '.csv')) if len(pnl_df.replace(0, np.nan).dropna()) / len(pnl_df) < 0.3: print(factor_name + ' is delete') os.remove(factor_load_path) else: pnl_df.to_csv( os.path.join(save_pnl_path, factor_name[:-4] + '.csv')) print('pnl create!')
# for sector_name in ['market_top_100', 'market_top_500', 'market_top_1000']: sector_name = 'market_top_300' index_name = '000300' # sector sector_df = load_sector_data(begin_date, end_date, sector_name) sector_mean = sector_df.sum(axis=1).mean() sector_set = sector_df.columns # suspend or limit up_dn locked_df = load_locked_data(begin_date, end_date, sector_set) # return # index data # index_df = load_index_data(begin_date, end_date, index_name) index_df = load_index_data(begin_date, end_date, index_name) for file_name in sorted( os.listdir('/mnt/mfs/dat_whs/data/intra_factor_data')): vwap_file_path = os.path.join( '/mnt/mfs/dat_whs/data/base_data/intra_vwap_tab_{}.pkl'.format( file_name.split('_')[3])) vwap_df = pd.read_pickle(vwap_file_path) vwap_df.columns = [x[2:] + '.' + x[:2] for x in vwap_df.columns] factor_df = pd.read_pickle('/mnt/mfs/dat_whs/data/intra_factor_data/' + file_name) # return_choose = load_pct(begin_date, end_date, sector_set) xnms = sorted( list( set(vwap_df.columns) & set(factor_df.columns)