def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ new_stock_days = 60 data = Stock().get_ipo_date() data.columns = ['ipo', 'delist'] data = data.astype(str) date_series = Date().get_trade_date_series(beg_date, end_date) res = pd.DataFrame() for i_date in range(len(date_series)): date = date_series[i_date] print('Calculating Barra Risk factor %s at date %s' % (self.raw_factor_name, date)) new_stock_date = Date().get_trade_date_offset( date, -new_stock_days) data_date = data[(data['ipo'] < new_stock_date) & (data['delist'] > date)] data_date['GEM'] = data_date.index.map(CodeFormat().get_gem_stock) res_date = pd.DataFrame(data_date['GEM']) res_date.columns = [date] res = pd.concat([res, res_date], axis=1) self.save_risk_factor_exposure(res, self.raw_factor_name)
def cal_ipo_sell(): out_path = 'E:\\3_Data\\7_other_data\\4_cal_ipo_sell\\' new_days = 60 today = datetime.today().strftime("%Y%m%d") Date().load_trade_date_series("D") # MfcData().load_ftp_daily(date=today) # MfcData().change_ftp_file(date=today) before_trade_data = Date().get_trade_date_offset(today, -1) data = MfcData().get_fund_security(before_trade_data) data = data.dropna(subset=['基金名称']) data = data[['基金名称', '证券代码', '持仓', '证券类别']] data.columns = ['FundName', 'StockCode', 'Holding', 'Type'] data = data[data.Type == '股票'] data.StockCode = data.StockCode.map(stock_code_add_postfix) data["Market"] = data.StockCode.map(get_stcok_market) Stock().load_all_stock_code_now() Stock().load_ipo_date() stock = Stock().get_ipo_date() stock.columns = ['IpoDate', 'DelistDate'] stock['StockCode'] = stock.index stock['IpoDate'] = stock['IpoDate'].map(lambda x: str(int(x))) new_stock_date = datetime.today() - timedelta(days=new_days) new_stock_date = new_stock_date.strftime("%Y%m%d") all_data = pd.merge(data, stock, on=['StockCode'], how="left") all_data = all_data[all_data.IpoDate > new_stock_date] code_list = list(set(all_data.StockCode.values)) code_str = ','.join(code_list) pct = w.wsq(code_str, "rt_pct_chg,rt_vol") pct = pd.DataFrame(pct.Data, columns=pct.Codes, index=['Pct', 'Vol']).T pct['StockCode'] = pct.index new_data = pd.merge(all_data, pct, on=['StockCode'], how="left") new_data = new_data[new_data['Vol'] > 0] new_data = new_data[new_data['Pct'] < 0.09] fund_list = list(set(data['FundName'])) for i_fund in range(len(fund_list)): fund_name = fund_list[i_fund] fund_data = new_data[new_data.FundName == fund_name] out_sub_path = os.path.join(out_path, today) if not os.path.exists(out_sub_path): os.mkdir(out_sub_path) print(" Make Folder At ", today) if len(fund_data) > 0: warnings.filterwarnings("ignore") fund_data_out = fund_data[['StockCode', 'Holding', 'Market']] fund_data_out.columns = ['Ticker', 'Shares', 'Market'] fund_data_out['Direction'] = 2 fund_data_out['Price'] = 0 fund_data_out['Market Code'] = fund_data_out['Market'].map( lambda x: 1 if x == 'SH' else 2) fund_data_out['Price Model'] = 4 fund_data_out['Ticker'] = fund_data_out['Ticker'].map( lambda x: x[0:6]) fund_data_out = fund_data_out[[ 'Ticker', 'Direction', 'Shares', 'Price', 'Price Model', 'Market Code' ]] file = fund_name + '.xls' out_file = os.path.join(out_sub_path, file) print(out_file) fund_data_out.to_excel(out_file, index=None) ftp = MyFtp() ftp.connect() ftp_file = os.path.join("\\ipo_stock\\", today, file) ftp.upload_file(ftp_file, out_file) ftp.close()
def ipo_sell(self, today=datetime.today().strftime("%Y%m%d")): """ 计算所有基金新股是否打开 """ before_trade_data = Date().get_trade_date_offset(today, -1) # get holding data data = MfcData().get_group_security(before_trade_data) data = data.dropna(subset=['基金名称']) data = data[['基金名称', '基金编号', '组合名称', '组合编号', '证券代码', '持仓', '证券类别']] data.columns = [['基金名称', '基金编号(序号)', '组合名称', '组合编号', '证券代码', '指令数量', '证券类别']] data = data[data['证券类别'] == '股票'] data['证券代码'] = data['证券代码'].map(CodeFormat().stock_code_add_postfix) data["交易市场内部编号"] = data['证券代码'].map(CodeFormat().get_stcok_market) fund_info = MfcData().get_mfc_fund_info() fund_info = fund_info.dropna(subset=['FundId']) fund_info['FundId'] = fund_info['FundId'].map(CodeFormat.stock_code_add_postfix) fund_info['FundId'] = fund_info['FundId'].map(lambda x: x[0:6]) # get ipo data Stock().load_all_stock_code_now() Stock().load_ipo_date() stock = Stock().get_ipo_date() stock.columns = ['IpoDate', 'DelistDate'] stock['证券代码'] = stock.index stock['IpoDate'] = stock['IpoDate'].map(lambda x: str(int(x))) # get New Stock new_stock_date = datetime.today() - timedelta(days=self.new_days) new_stock_date = new_stock_date.strftime("%Y%m%d") all_data = pd.merge(data, stock, on=['证券代码'], how="left") all_data = all_data[all_data.IpoDate > new_stock_date] # get Vol and Pct of New Stock code_list = list(set(all_data['证券代码'].values)) code_str = ','.join(code_list) pct = w.wsq(code_str, "rt_pct_chg,rt_vol") pct = pd.DataFrame(pct.Data, columns=pct.Codes, index=['Pct', 'Vol']).T pct['证券代码'] = pct.index # filter multi_factor new_data = pd.merge(all_data, pct, on=['证券代码'], how="left") new_data = new_data[new_data['Vol'] > 0] new_data = new_data[new_data['Pct'] < 0.09] # New Local Folder out_sub_path = os.path.join(self.data_path, today) if not os.path.exists(out_sub_path): os.mkdir(out_sub_path) # New FTP Folder ftp = MyFtp() ftp.connect() ftp_folder = os.path.join(self.ftp_path, today) ftp.upload_folder(ftp_folder) ftp.close() # manager data manager_data = MfcData().get_manager_fund() for i_manager in range(len(manager_data.columns)): manager_name = manager_data.columns[i_manager] manager_fund = manager_data[manager_name] manager_fund = manager_fund.dropna() fund_data = new_data[new_data['基金名称'].map(lambda x: x in manager_fund.values)] if len(fund_data) > 0: warnings.filterwarnings("ignore") fund_data_out = fund_data[['证券代码', '指令数量', '交易市场内部编号', '基金编号(序号)', '基金名称', '组合编号']] fund_data_out['委托方向'] = 2 fund_data_out['指令价格'] = 0 fund_data_out['交易市场内部编号'] = fund_data_out['交易市场内部编号'].map(lambda x: 1 if x == 'SH' else 2) fund_data_out['价格模式'] = "" fund_data_out['当前指令市值/净值(%)'] = "" fund_data_out['目标市值/净值(%)'] = "" fund_data_out['基金名称'] = "" fund_data_out['证券代码'] = fund_data_out['证券代码'].map(lambda x: x[0:6]) fund_data_out = fund_data_out[['证券代码', '委托方向', '指令数量', '指令价格', '价格模式', '交易市场内部编号', '当前指令市值/净值(%)', '目标市值/净值(%)', '基金编号(序号)', '基金名称', '组合编号']] file = manager_name + '.xls' out_file = os.path.join(out_sub_path, file) print(out_file) fund_data_out.to_excel(out_file, index=None) ftp = MyFtp() ftp.connect() ftp_file = os.path.join(self.ftp_path, today, file) ftp.upload_file(ftp_file, out_file) ftp.close()
def backtest_alpha_factor(self, factor_name): # 参数 #################################################################################################### ipo_min_days = 90 # factor_name = 'ROERankYOY' input_stock_pool = None input_backtest_date_series = None tradedays_yearly = 250 transaction_cost = 0.0008 stamp_tax = 0.001 min_trade_volume = 0.0 min_free_mv = 0.0 need_alpha_norm_inv = True lead_lag_length = 50 group_number = 10 beg_date = "20040101" end_date = "20181001" backtest_period = "W" backtest_period_days = 5 path = r'E:\3_Data\5_stock_data\3_alpha_model\backtest_alpha' #################################################################################################### # 所需要的数据 #################################################################################################### alpha_factor = Stock().read_factor_h5(factor_name, Stock().get_h5_path(",y_alpha")) alpha_post = factor_name[-3:] alpha_factor_date_series = list(alpha_factor.columns) if need_alpha_norm_inv: alpha_factor = FactorPreProcess().inv_normalization(alpha_factor) trade_status = Stock().read_factor_h5("TradingStatus") trade_status_date_series = list(trade_status.columns) if alpha_post != 'Res': stock_pct = Stock().read_factor_h5("Pct_chg") stock_pct_date_series = list(stock_pct.columns) else: stock_pct = Stock().read_factor_h5("PctRes", Stock().get_h5_path("my_alpha")) stock_pct_date_series = list(stock_pct.columns) price_adjust = Stock().read_factor_h5("PriceCloseAdjust") price_adjust_date_series = list(price_adjust.columns) trade_volume = Stock().read_factor_h5("TradeVolumn") trade_volume_date_series = list(trade_volume.columns) free_mv = Stock().read_factor_h5("Mkt_freeshares") free_mv_date_series = list(free_mv.columns) ipo_days = Stock().get_ipo_date() ipo_days.columns = ['IpoDate', 'DelistDate'] # 回测日期 #################################################################################################### bt_beg_date = max(beg_date, trade_status_date_series[0], stock_pct_date_series[0], alpha_factor_date_series[0], price_adjust_date_series[0], trade_volume_date_series[0], free_mv_date_series[0]) bt_end_date = min(end_date, trade_status_date_series[-1], stock_pct_date_series[-1], alpha_factor_date_series[-1], price_adjust_date_series[-1], trade_volume_date_series[-1], free_mv_date_series[-1]) if input_backtest_date_series is None: backtest_date_series = Date().get_trade_date_series( bt_beg_date, bt_end_date, backtest_period) else: backtest_date_series = Date().get_trade_date_series( bt_beg_date, bt_end_date, "D") backtest_date_series = list( set(input_backtest_date_series) & set(backtest_date_series)) backtest_date_series.sort() backtest_date_series = set(trade_status_date_series) & set(stock_pct_date_series) & \ set(alpha_factor_date_series) & set(price_adjust_date_series) & \ set(backtest_date_series) & set(trade_volume_date_series) & set(free_mv_date_series) backtest_date_series = list(backtest_date_series) backtest_date_series.sort() #################################################################################################### # 开始每日回测 #################################################################################################### result = pd.DataFrame([], columns=['ValDate', "BuyDate", "SellDate"], index=backtest_date_series) lag_result = pd.DataFrame([], index=backtest_date_series) labels = ["Gp_" + str(x) for x in range(1, group_number + 1)] group_result = pd.DataFrame([], columns=labels, index=backtest_date_series) for i_date in range(0, len(backtest_date_series) - 1): # 日期 ############################################################################## alpha_date = backtest_date_series[i_date] trade_date = Date().get_trade_date_offset(alpha_date, 1) next_alpha_date = backtest_date_series[i_date + 1] next_trade_date = Date().get_trade_date_offset(next_alpha_date, 1) print("BackTest Stock Alpha At %s" % alpha_date) ############################################################################## # 合并数据 ############################################################################## alpha_factor_date = pd.DataFrame(alpha_factor[alpha_date]) alpha_factor_date.columns = ['Alpha'] next_alpha_factor_date = pd.DataFrame( alpha_factor[next_alpha_date]) next_alpha_factor_date.columns = ['NextAlpha'] trade_status_date = pd.DataFrame(trade_status[trade_date]) trade_status_date.columns = ['Status'] price_adjust_date = pd.DataFrame(price_adjust[trade_date]) price_adjust_date.columns = ['Price'] next_price_adjust_date = pd.DataFrame( price_adjust[next_trade_date]) next_price_adjust_date.columns = ['NextPrice'] all_data = pd.concat([ alpha_factor_date, next_alpha_factor_date, trade_status_date, price_adjust_date, next_price_adjust_date, ipo_days ], axis=1) all_data = all_data.dropna() ############################################################################## # 股票池 # 剔除不能交易的股票 # 剔除新股(还可以剔除流通市值或者交易额比较少的股票) # 是否有外部股票池 ############################################################################## can_trade_code = all_data['Status'].map(lambda x: x in [0, 1]) all_data = all_data.loc[can_trade_code, :] the_ipo_date = Date().get_trade_date_offset( alpha_date, -ipo_min_days) all_data = all_data.loc[all_data['IpoDate'] < the_ipo_date, :] all_data = all_data.dropna() if input_stock_pool is None: stock_pool = list(all_data.index) stock_pool.sort() else: stock_pool = list( set(input_stock_pool) & set(list(all_data.index))) stock_pool.sort() all_data = all_data.loc[stock_pool, :] all_data['Pct'] = all_data['NextPrice'] / all_data['Price'] - 1.0 ############################################################################## # 计算因子的时滞性 ############################################################################## # for i in np.arange(-lead_lag_length, lead_lag_length): # # lag_alpha_date = Date().get_trade_date_offset(alpha_date, -i) # # if lag_alpha_date in exposure.columns: # alpha_factor_date = pd.DataFrame(exposure[lag_alpha_date]) # alpha_factor_date.columns = ['Alpha'] # # lag_all_data = all_data.copy() # lag_all_data['Alpha'] = alpha_factor_date.loc[lag_all_data.index, 'Alpha'] # # lag_all_data['AlphaStand'] = lag_all_data['Alpha'] - lag_all_data['Alpha'].mean() # lag_all_data['AlphaStand'] /= lag_all_data['Alpha'].std() # lag_all_data['Weight'] = lag_all_data['AlphaStand'] / lag_all_data['AlphaStand'].abs().sum() # ls_factor_return = (lag_all_data['Weight'] * lag_all_data['Pct']).sum() # lag_result.loc[alpha_date, "Lag_" + str(i)] = ls_factor_return # else: # lag_result.loc[alpha_date, "Lag_" + str(i)] = np.nan ############################################################################## # 计算因子收益率 IC等 ############################################################################## ic = all_data['Pct'].corr(all_data['Alpha']) rank_ic = all_data['Pct'].corr(all_data['Alpha'], method='spearman') all_data['AlphaStand'] = ( all_data['Alpha'] - all_data['Alpha'].mean()) / all_data['Alpha'].std() all_data['Weight'] = all_data['AlphaStand'] / all_data[ 'AlphaStand'].abs().sum() ls_factor_return = (all_data['Weight'] * all_data['Pct']).sum() port_alpha_exposure = (all_data['AlphaStand'] * all_data['AlphaStand']).sum() ls_factor_return_2 = (all_data['AlphaStand'] * all_data['Pct']).sum() / port_alpha_exposure auto_rank_corr = all_data['NextAlpha'].corr(all_data['Alpha'], method='spearman') ############################################################################## # 计算分组收益率 ############################################################################## all_data_sort = all_data.sort_values(by=['Alpha'], ascending=False) labels = ["Gp_" + str(x) for x in range(1, group_number + 1)] all_data_sort['Gp'] = pd.qcut(all_data_sort['Alpha'], q=group_number, labels=labels) all_mean = all_data_sort['Pct'].mean() group_result.loc[alpha_date, labels] = all_data_sort.groupby( by=['Gp'])['Pct'].mean() - all_mean ############################################################################## # LongTopShortOtherReturn ############################################################################## all_data_sort = all_data.sort_values(by=['Alpha'], ascending=False) top_end_index = int(len(all_data) / group_number) all_data_sort['Score'] = 0.0 all_data_sort.loc[all_data_sort.index[0:top_end_index], "Score"] = 1.0 all_data_sort['Score'] -= all_data_sort['Score'].mean() all_data_sort['Score'] /= all_data_sort['Score'].abs().sum() long_top_short_other_return = (all_data_sort['Score'] * all_data_sort['Pct']).sum() ############################################################################## # ShortBottomLongOtherReturn ############################################################################## all_data_sort = all_data.sort_values(by=['Alpha'], ascending=True) top_end_index = int(len(all_data) / group_number) all_data_sort['Score'] = 0.0 all_data_sort.loc[all_data_sort.index[0:top_end_index], "Score"] = -1.0 all_data_sort['Score'] -= all_data_sort['Score'].mean() all_data_sort['Score'] /= all_data_sort['Score'].abs().sum() short_bottom_long_other_return = (all_data_sort['Score'] * all_data_sort['Pct']).sum() ############################################################################## # 写入Result ############################################################################## result.loc[alpha_date, "ValDate"] = alpha_date result.loc[alpha_date, "BuyDate"] = trade_date result.loc[alpha_date, "SellDate"] = next_trade_date result.loc[alpha_date, "RankIC"] = rank_ic result.loc[alpha_date, "IC"] = ic result.loc[alpha_date, "LSFactorReturn"] = ls_factor_return result.loc[alpha_date, 'AutoRankCorr'] = auto_rank_corr result.loc[alpha_date, 'StockNumber'] = len(all_data) result.loc[alpha_date, 'StdPct'] = all_data['Pct'].std() result.loc[ alpha_date, 'ShortBottomLongOtherReturn'] = short_bottom_long_other_return result.loc[alpha_date, 'LongTopShortOtherReturn'] = long_top_short_other_return # LSFactorReturn = IC*std(AlphaStand)*std(StdPct)*(N-1) ############################################################################## # 每日循环结束 输出文件 #################################################################################################### result['CumLSFactorReturn'] = result["LSFactorReturn"].cumsum() result['CumRankIC'] = result["RankIC"].cumsum() result['CumShortBottomLongOtherReturn'] = result[ "ShortBottomLongOtherReturn"].cumsum() result['CumLongTopShortOtherReturn'] = result[ "LongTopShortOtherReturn"].cumsum() group_result_cumsum = group_result.cumsum() lag_result_cumsum = lag_result.cumsum() ############################################################################## summary = pd.DataFrame([], columns=['Summary']) year_factor_return = result['LSFactorReturn'].mean( ) * tradedays_yearly / backtest_period_days year_factor_std = result['LSFactorReturn'].std() * np.sqrt( tradedays_yearly / backtest_period_days) ic_mean = result['RankIC'].mean() ic_std = result['RankIC'].std() mean_antocorr = result['AutoRankCorr'].mean() #################################################################################################### summary.loc["YearFactorReturn", 'Summary'] = year_factor_return summary.loc["YearFactorStd", 'Summary'] = year_factor_std summary.loc["YearFactorIR", 'Summary'] = year_factor_return / year_factor_std summary.loc["ICMean", 'Summary'] = ic_mean summary.loc["ICstd", 'Summary'] = ic_std summary.loc["ICIR", 'Summary'] = ic_mean / ic_std summary.loc["AntoCorr", 'Summary'] = mean_antocorr #################################################################################################### sub_path = os.path.join(path, factor_name) if not os.path.exists(sub_path): os.makedirs(sub_path) result.to_csv(os.path.join(sub_path, factor_name + '_Result.csv')) group_result_cumsum.to_csv( os.path.join(sub_path, factor_name + '_GroupResult.csv')) lag_result_cumsum.to_csv( os.path.join(sub_path, factor_name + '_LagResult.csv')) summary.to_csv(os.path.join(sub_path, factor_name + '_Summary.csv'))