def get_stock_characteristic_size(self): """ 最近交易日在 在市值因子(自由流通市值)上的暴露 """ # get data fund_all_stock = self.fund_hold_stock.copy() fund_all_stock.columns = ['FundWeight'] stock_sum_all = fund_all_stock['FundWeight'].sum() mkt_free = Stock().read_factor_h5("Mkt_freeshares") date = self.last_trade_date mkt_free_date = pd.DataFrame(mkt_free[date]) mkt_free_date /= 100000000.0 mkt_free_date.columns = ['Mkt'] index_weight_300 = Index().get_weight_date(index_code="000300.SH", date=date) index_weight_300.columns = ['300Weight'] index_weight_500 = Index().get_weight_date(index_code="000905.SH", date=date) index_weight_500.columns = ['500Weight'] # 市值中位数 # concat_data = pd.concat([fund_all_stock, mkt_free_date], axis=1) # concat_data = concat_data.dropna() # concat_data = concat_data.sort_values(by=['FundWeight'], ascending=False) # size_median = np.round(concat_data['Mkt'].median(), 2) # 300成分占比 concat_data = pd.concat([fund_all_stock, index_weight_300], axis=1) concat_data = concat_data.dropna() stock_300_weight = concat_data['FundWeight'].sum() stock_300_weight /= stock_sum_all # 500成分占比 concat_data = pd.concat([fund_all_stock, index_weight_500], axis=1) concat_data = concat_data.dropna() stock_500_weight = concat_data['FundWeight'].sum() stock_500_weight /= stock_sum_all # 其他成分占比 other_weight = 1 - stock_300_weight - stock_500_weight stock_characteristic_size = pd.DataFrame( [stock_300_weight, stock_500_weight, other_weight], index=['沪深300成分股权重', '中证500成分股权重', '其他成分股权重'], columns=['数值']) stock_characteristic_size['持股特征(%s)' % self.last_trade_date] = '市值分布' stock_characteristic_size['具体表现'] = stock_characteristic_size.index return stock_characteristic_size
def get_benchmark_weight_date(self, date): """ 得到 股票基准权重 """ benchmark_weight = Index().get_weight_date( index_code=self.benchmark_code, date=date) benchmark_weight.columns = ['BenchWeight'] return benchmark_weight
def get_index_pct(self): """ 指数某一段时间内的涨跌幅 """ index_pct = Index().get_index_factor(self.index_code, self.beg_date_pre, self.end_date) index_pct['PCT'] = index_pct['CLOSE'].pct_change() index_pct.columns = ['指数收盘', '指数涨跌幅'] index_pct = index_pct.dropna() return index_pct
def cal_index_excess_pct(self): """ 计算指数超额收益率 """ name = "Market" index_pct = Index().get_index_factor("000985.CSI", attr=['CLOSE']) index_pct = index_pct.iloc[0:-1, :] index_pct = index_pct.pct_change() index_pct.columns = ['StockIndexReturn'] index_pct = index_pct['StockIndexReturn'] * 100 free_pct = Macro().get_daily_risk_free_rate() free_pct = free_pct['RiskFreeRate'] index_excess_pct = index_pct.sub(free_pct, axis='index') index_excess_pct = index_excess_pct.dropna() index_excess_pct = pd.DataFrame(index_excess_pct) index_excess_pct.columns = [name] index_excess_pct['CumSumReturn'] = index_excess_pct[name].cumsum() index_excess_pct.to_csv(os.path.join(self.data_path, 'factor_return', 'FactorReturn_%s.csv' % name))
def filter_fund_pool(self, index_code, index_name, end_date, track_error_up): """ 得到沪深300 、中证500基金池 """ # 参数 # end_date = "20181231" # index_name = '沪深300' # index_code = '000300.SH' # track_error_up = 0.03 beg_date = Date().get_trade_date_offset(end_date, -250) # 读取数据 fund_nav = Fund().get_fund_factor("Repair_Nav") index_close = Index().get_index_factor(index_code, attr=['CLOSE']) index_close.columns = [index_code] result = pd.DataFrame([], index=fund_nav.columns, columns=['跟踪误差', '数据长度']) # 计算最近1年跟踪误差数据 fund_nav = fund_nav.loc[index_close.index, :] fund_pct = fund_nav.pct_change() index_pct = index_close.pct_change() index_pct = index_pct[index_code] fund_excess_pct = fund_pct.sub(index_pct, axis='index') fund_excess_pct_period = fund_excess_pct.loc[beg_date:end_date, :] result.loc[:, "数据长度"] = fund_excess_pct_period.count() result.loc[:, "跟踪误差"] = fund_excess_pct_period.std() * np.sqrt(250) # 筛选 result = result.dropna() result = result[result['数据长度'] > self.data_min_length] result = result[result['跟踪误差'] < track_error_up] # concat fund basic info data_pd = Fund().get_wind_fund_info() data_pd = data_pd[[ 'BenchMark', 'Name', 'FullName', 'SetupDate', 'InvestType' ]] data_pd.columns = ['基金基准', '基金简称', '基金全称', '上市日期', '基金类型'] data = pd.concat([data_pd, result], axis=1) data = data.dropna() data = data[data["基金基准"].map(lambda x: index_name in x)] data = data[data["上市日期"] < beg_date] data = data[data["基金全称"].map(lambda x: "交易型开放式指数" not in x)] data = data[data["基金全称"].map(lambda x: "联接" not in x)] data['A类基金'] = data['基金简称'].map(Fund().if_a_fund) data = data[data['A类基金'] == 'A类基金'] # 输出结果 out_path = os.path.join(self.data_path, "filter_fund_pool") file_name = os.path.join(out_path, '基金最终筛选池_' + index_name + '.xlsx') sheet_name = "基金筛选池" num_format_pd = pd.DataFrame([], columns=data.columns, index=['format']) num_format_pd.ix['format', :] = '0.00' num_format_pd.ix['format', '跟踪误差'] = '0.00%' num_format_pd.ix['format', '数据长度'] = '0' excel = WriteExcel(file_name) worksheet = excel.add_worksheet(sheet_name) excel.write_pandas(data, worksheet, begin_row_number=0, begin_col_number=1, num_format_pd=num_format_pd, color="red", fillna=True)
def calculate_fund_factor(self, index_code, index_name, end_date): """ 计算基金最近一段时间内 跟踪误差、超额收益、信息比率 """ # 参数 # index_code = '000905.SH' # index_name = '中证500' # end_date = '20151231' beg_date = Date().get_trade_date_offset(end_date, -self.data_length) # 读取数据 基金池 基金净值数据 指数收盘价数据 file = os.path.join(self.data_path, 'filter_fund_pool', '基金最终筛选池_' + index_name + '.xlsx') fund_code = pd.read_excel(file, index_col=[1], encoding='gbk') fund_code['上市日期'] = fund_code['上市日期'].map(str) fund_nav = Fund().get_fund_factor("Repair_Nav") index_close = Index().get_index_factor(index_code, attr=['CLOSE']) index_close.columns = [index_code] # 筛选新基金 并下载基金规模 fund_code = fund_code.loc[:, ['上市日期', '基金全称', '基金简称']] fund_code = fund_code[fund_code['上市日期'] < beg_date] fund_code_str = ','.join(fund_code.index) fund_asset = w.wss(fund_code_str, "netasset_total", "unit=1;tradeDate=" + str(end_date)) fund_asset = pd.DataFrame(fund_asset.Data, index=['基金规模'], columns=fund_asset.Codes).T fund_asset['基金规模'] /= 100000000.0 fund_asset['基金规模'] = fund_asset['基金规模'].round(2) fund_asset = fund_asset[fund_asset['基金规模'] > 0.45] fund_info = pd.concat([fund_code, fund_asset], axis=1) fund_info = fund_info.dropna() # 计算最近1年 各项指标 result = pd.DataFrame([], index=fund_code.index, columns=['跟踪误差']) fund_nav = fund_nav.ix[index_close.index, fund_code.index] fund_pct = fund_nav.pct_change() index_pct = index_close.pct_change() index_pct = index_pct[index_code] fund_excess_pct = fund_pct.sub(index_pct, axis='index') fund_excess_pct_period = fund_excess_pct.loc[beg_date:end_date, :] fund_nav_period = fund_nav.loc[beg_date:end_date, :] index_close_period = index_close.loc[beg_date:end_date, :] result.ix[:, "数据长度"] = fund_excess_pct_period.count() result.ix[:, "跟踪误差"] = fund_excess_pct_period.std() * np.sqrt(250) fund_return_log = (fund_nav_period.pct_change() + 1.0).applymap( np.log).cumsum().ix[-1, :] fund_return = fund_return_log.map(np.exp) - 1 last_date_close = index_close_period.iloc[len(fund_nav_period) - 1, :] first_date_close = index_close_period.iloc[0, :] result.ix[:, "基金涨跌"] = fund_return result.ix[:, "指数涨跌"] = (last_date_close / first_date_close - 1.0).values[0] result.ix[:, "超额收益"] = result.ix[:, "基金涨跌"] - result.ix[:, "指数涨跌"] result.ix[:, "信息比率"] = result.ix[:, "超额收益"] / result.ix[:, "跟踪误差"] result = result[result['数据长度'] > self.data_min_length] result = pd.concat([fund_info, result], axis=1) result = result.sort_values(by=['信息比率'], ascending=False) result = result.dropna() result = result.fillna("") # 写到EXCEL表 out_path = os.path.join(self.data_path, "cal_fund_factor", index_name) file_name = os.path.join( out_path, '基金指标_' + index_name + '_' + end_date + '.xlsx') num_format_pd = pd.DataFrame([], columns=result.columns, index=['format']) num_format_pd.ix['format', :] = '0.00%' num_format_pd.ix['format', '数据长度'] = '0' num_format_pd.ix['format', '信息比率'] = '0.00' num_format_pd.ix['format', '基金规模'] = '0.00' num_format_pd.ix['format', '信息比率'] = '0.00' sheet_name = "基金指标" excel = WriteExcel(file_name) worksheet = excel.add_worksheet(sheet_name) excel.write_pandas(result, worksheet, begin_row_number=0, begin_col_number=0, num_format_pd=num_format_pd, color="red", fillna=True)
def AttributeMfctedaFund(index_code_ratio, fund_code, index_code, fund_name, beg_date, end_date, fund_id, path, type, mg_fee_ratio): """ 将某只基金 一段时间内 每日净值涨跌 拆分 """ # 参数举例 ###################################################################################### # index_code_ratio = 0.95 # fund_code = '162216.OF' # index_code = '000905.SH' # fund_name = '泰达中证500指数分级' # beg_date = '20180101' # end_date = '20180731' # fund_id = 38 # path = 'C:\\Users\\doufucheng\\OneDrive\\Desktop\\data\\' # type = '专户' # 读取基金复权涨跌幅 ################################################################################################ beg_date = Date().get_trade_date_offset(beg_date, -0) if type == "专户": fund_pct = MfcData().get_fund_nav_adjust( fund_name, Date().get_trade_date_offset(beg_date, -2), end_date) fund_pct['基金涨跌幅'] = fund_pct['累计复权净值'].pct_change() else: fund_pct = MfcData().get_mfcteda_public_fund_pct_wind( fund_code, beg_date, end_date) fund_pct.columns = ['基金涨跌幅'] # 指数收益 持仓数据 # 净值 = 股票资产 + 债券资产 + 基金资产 + 回购资产 + 当前现金余额 + 累计应收 - 累计应付 # 累计应收 和 累计应付 代表 每日申赎 计提 交易管理费用等 未结算至现金的部分 # 这里并没有按照每日拆分净值的方式计算 而是按照每日拆分当日总浮动盈亏 = 前日净值 * 当日基金复权涨跌幅 ################################################################################################ index_pct = Index().get_index_factor( index_code, Date().get_trade_date_offset(beg_date, -1), end_date, ['CLOSE']) index_pct = index_pct.pct_change() index_pct.columns = ['指数涨跌幅'] fund_asset = MfcData().get_fund_asset_period(fund_id, beg_date, end_date) close_unadjust = Stock().get_factor_h5("Price_Unadjust", None, "primary_mfc") adjust_factor = Stock().get_factor_h5("AdjustFactor", None, "primary_mfc") fund_asset['股票资产-汇总'] = fund_asset['股票资产'] data = pd.concat([fund_pct, index_pct, fund_asset], axis=1) data = data.dropna(subset=['基金涨跌幅', '指数涨跌幅']) data['昨日净值'] = data['净值'].shift(1) data['昨日基金份额'] = data['基金份额'].shift(1) data['昨日单位净值'] = data['单位净值'].shift(1) # 计算 每一日 新股收益 股票收益 ################################################################################################ date_series = Date().get_trade_date_series(beg_date, end_date) for i_date in range(len(date_series)): date = date_series[i_date] new_stock_return, new_stock_asset = CalNewStockReturnDaily( fund_name, date, path, close_unadjust, adjust_factor, cal_type="close") stock_return, mg_fee, trade_fee, stock_asset = CalStockReturnDaily( fund_name, date, path, close_unadjust, adjust_factor, mg_fee_ratio, cal_type="close") data.loc[date, '新股资产'] = new_stock_asset data.loc[date, '股票资产'] = stock_asset data.loc[date, '新股盈亏'] = new_stock_return data.loc[date, '股票盈亏'] = stock_return data.loc[date, '管理托管费用'] = mg_fee data.loc[date, '交易印花费用'] = trade_fee new_stock_return, new_stock_asset = CalNewStockReturnDaily( fund_name, date, path, close_unadjust, adjust_factor, cal_type="average") stock_return, mg_fee, trade_fee, stock_asset = CalStockReturnDaily( fund_name, date, path, close_unadjust, adjust_factor, mg_fee_ratio, cal_type="average") data.loc[date, '新股盈亏-TradePrice'] = new_stock_return data.loc[date, '股票盈亏-TradePrice'] = stock_return data = data.dropna(subset=['基金涨跌幅', '指数涨跌幅']) data[['新股盈亏', '当日股票总盈亏金额']] = data[['新股盈亏', '当日股票总盈亏金额']].fillna(0.0) data = data[data['股票资产'] > 0.0] data['股票仓位'] = data['股票资产'] / data['净值'] data['昨日股票仓位'] = data['股票仓位'].shift(1) # 资产盈亏 = 股票盈亏 + 新股盈亏 + 债券其他 + 托管管理费 + 交易印花费 ################################################################################################ cols = ['管理托管费用', '交易印花费用', '股票盈亏', '新股盈亏'] data[cols] = data[cols].fillna(0.0) data[ '汇总盈亏'] = data['管理托管费用'] + data['交易印花费用'] + data['股票盈亏'] + data['新股盈亏'] data['日内交易盈亏'] = data['股票盈亏'] - data['股票盈亏-TradePrice'] + data[ '新股盈亏'] - data['新股盈亏-TradePrice'] data['资产盈亏'] = data['基金涨跌幅'] * data['昨日净值'] data['固收其他盈亏'] = data['资产盈亏'] - data['汇总盈亏'] - data['日内交易盈亏'] data['昨日股票资产'] = data['股票资产'].shift(1) data['股票涨跌幅'] = data['股票盈亏'] / data['昨日股票资产'] # 股票盈亏 = 基准盈亏 + 超额盈亏 ################################################################################################ data['基准盈亏'] = data['指数涨跌幅'] * data['昨日净值'] * index_code_ratio data['超额盈亏'] = data['昨日股票仓位'] * data['股票涨跌幅'] * data['昨日净值'] - data['基准盈亏'] # 超额盈亏 = 择时(资产配置能力) + 选股能力 ################################################################################################ data['择时盈亏'] = (data['昨日股票仓位'] - index_code_ratio) * data['指数涨跌幅'] * data['昨日净值'] data['选股盈亏'] = data['昨日股票仓位'] * (data['股票涨跌幅'] - data['指数涨跌幅']) * data['昨日净值'] data['全仓选股盈亏'] = (data['股票涨跌幅'] - data['指数涨跌幅']) * data['昨日净值'] # 以单位净值计算 ################################################################################################ data['净值-资产盈亏'] = data['资产盈亏'] / data['昨日基金份额'] data['净值-管理托管费用'] = data['管理托管费用'] / data['昨日基金份额'] data['净值-交易印花费用'] = data['交易印花费用'] / data['昨日基金份额'] data['净值-股票盈亏'] = data['股票盈亏'] / data['昨日基金份额'] data['净值-新股盈亏'] = data['新股盈亏'] / data['昨日基金份额'] data['净值-固收其他盈亏'] = data['固收其他盈亏'] / data['昨日基金份额'] data['净值-日内交易盈亏'] = data['日内交易盈亏'] / data['昨日基金份额'] data['净值-基准盈亏'] = data['基准盈亏'] / data['昨日基金份额'] data['净值-超额盈亏'] = data['超额盈亏'] / data['昨日基金份额'] data['净值-择时盈亏'] = data['择时盈亏'] / data['昨日基金份额'] data['净值-选股盈亏'] = data['选股盈亏'] / data['昨日基金份额'] data['净值-全仓选股盈亏'] = data['全仓选股盈亏'] / data['昨日基金份额'] index = [ '净值-资产盈亏', '净值-股票盈亏', '净值-新股盈亏', '净值-固收其他盈亏', '净值-日内交易盈亏', '净值-管理托管费用', '净值-交易印花费用', '净值-基准盈亏', '净值-超额盈亏', '净值-择时盈亏', '净值-选股盈亏', '净值-全仓选股盈亏' ] # 按照 百分比 收益率计算 ################################################################################################ data = data.dropna(subset=['昨日单位净值']) nav = data.loc[data.index[0], '昨日单位净值'] pct = data['净值-资产盈亏'].sum() / nav result = pd.DataFrame([], columns=['净值变化', '百分比', '收益率'], index=index) result.loc[index, '净值变化'] = data.loc[:, index].sum() result.loc[index, '百分比'] = result.loc[index, '净值变化'] / result.loc['净值-资产盈亏', '净值变化'] result.loc[index, '收益率'] = result.loc[index, '百分比'] * pct # 年化收益率 开始时间 结束时间 ################################################################################################ result.index = [ '基金整体', '股票部分', '新股部分', '固收+其他部分', "日内交易部分", '管理托管', '交易印花', '股票基准', '股票超额', '股票择时', '股票选股', '全仓股票选股' ] days = (datetime.strptime(end_date, '%Y%m%d') - datetime.strptime(beg_date, '%Y%m%d')).days result.loc[:, '年化收益'] = result.loc[:, '收益率'].map(lambda x: (x + 1)** (365 / days) - 1.0) result.loc['股票仓位', :] = data['股票仓位'].mean() result.loc['开始时间', :] = data.index[0] result.loc['结束时间', :] = data.index[-1] ################################################################################################ # 写入每天的拆分 #################################################################################################################### num_format_pd = pd.DataFrame([], columns=data.columns, index=['format']) num_format_pd.ix['format', :] = '0.00' num_format_pd.ix['format', ['基金涨跌幅', '指数涨跌幅', '股票仓位', '昨日股票仓位', '股票涨跌幅']] = '0.00%' num_format_pd.ix['format', [ '单位净值', '昨日单位净值', '净值-管理托管费用', '净值-交易印花费用', '净值-股票盈亏', '净值-新股盈亏', '净值-固收其他盈亏', '净值-基准盈亏', '净值-择时盈亏', '净值-选股盈亏', '净值-资产盈亏', '净值-全仓选股盈亏' ]] = '0.0000' begin_row_number = 0 begin_col_number = 1 color = "red" save_path = os.path.join(path, fund_name, "整体") file_name = os.path.join( save_path, "归因_" + fund_name + '_' + str(data.index[0]) + '_' + str(data.index[-1]) + ".xlsx") if not os.path.exists(save_path): os.makedirs(save_path) sheet_name = fund_name excel = WriteExcel(file_name) worksheet = excel.add_worksheet(sheet_name) excel.write_pandas(data, worksheet, begin_row_number=begin_row_number, begin_col_number=begin_col_number, num_format_pd=num_format_pd, color=color, fillna=True) excel.close() # 写入汇总的拆分 #################################################################################################################### num_format_pd = pd.DataFrame([], columns=result.columns, index=['format']) num_format_pd.ix['format', :] = '0.00%' num_format_pd.ix['format', ['净值变化']] = '0.0000' begin_row_number = 0 begin_col_number = 1 color = "red" save_path = os.path.join(path, fund_name, "整体") file_name = os.path.join( save_path, "归因汇总_" + fund_name + '_' + str(data.index[0]) + '_' + str(data.index[-1]) + ".xlsx") if not os.path.exists(save_path): os.makedirs(save_path) sheet_name = fund_name excel = WriteExcel(file_name) worksheet = excel.add_worksheet(sheet_name) excel.write_pandas(result, worksheet, begin_row_number=begin_row_number, begin_col_number=begin_col_number, num_format_pd=num_format_pd, color=color, fillna=True) excel.close()
def back_test_timing_factor(self, factor_name, index_code): """ 回测择时指标 """ data = self.get_factor_exposure(factor_name) index_pct = Index().get_index_factor(index_code, attr=['CLOSE']) index_pct = index_pct.pct_change() index_pct.columns = ['IndexReturn'] data = pd.concat([data, index_pct['IndexReturn']], axis=1) data = data.dropna(subset=['RawTimer', 'Timer', 'IndexReturn']) data['IndexNextReturn'] = data['IndexReturn'].shift(-1) data['LongTimer'] = data['Timer'].map(lambda x: x if x >= 0 else 0) data['ShortTimer'] = data['Timer'].map(lambda x: x if x <= 0 else 0) data['SPortNextReturn'] = data['IndexNextReturn'] * data['ShortTimer'] data['LPortNextReturn'] = data['IndexNextReturn'] * data['LongTimer'] data['LSPortNextReturn'] = data['IndexNextReturn'] * data['Timer'] data['SPortCumReturn'] = data['SPortNextReturn'].cumsum() data['LPortCumReturn'] = data['LPortNextReturn'].cumsum() data['LSPortCumReturn'] = data['LSPortNextReturn'].cumsum() data['IndexCumReturn'] = data['IndexReturn'].cumsum() col_output = [ "SPortCumReturn", "LPortCumReturn", "LSPortCumReturn", "IndexCumReturn" ] data_plot = data[col_output] ax = data_plot.plot() fig = ax.get_figure() file = os.path.join(self.data_path, 'factor_picture', factor_name + 'fig.png') fig.savefig(file) result = pd.DataFrame([], columns=[factor_name]) pos_corr = data['IndexNextReturn'].corr(data['Timer']) raw_corr = data['IndexNextReturn'].corr(data['RawTimer']) mean_zero = data.loc[data['Timer'] == 0, 'IndexNextReturn'].mean() mean_positive_profit = data.loc[(data['Timer'] > 0) & (data['IndexNextReturn'] > 0), 'IndexNextReturn'].mean() mean_positive_loss = data.loc[(data['Timer'] > 0) & (data['IndexNextReturn'] <= 0), 'IndexNextReturn'].mean() mean_negative_loss = -data.loc[(data['Timer'] < 0) & (data['IndexNextReturn'] >= 0), 'IndexNextReturn'].mean() mean_negative_profit = -data.loc[(data['Timer'] < 0) & (data['IndexNextReturn'] < 0), 'IndexNextReturn'].mean() number_positive = len(data.loc[data['Timer'] > 0, 'IndexNextReturn']) number_negative = len(data.loc[data['Timer'] < 0, 'IndexNextReturn']) number_zero = len(data.loc[data['Timer'] == 0, 'IndexNextReturn']) number_positive_profit = len( data.loc[(data['Timer'] > 0) & (data['IndexNextReturn'] > 0), 'IndexNextReturn']) number_negative_profit = len( data.loc[(data['Timer'] < 0) & (data['IndexNextReturn'] < 0), 'IndexNextReturn']) positive_wining_ratio = number_positive_profit / number_positive positive_profit_loss_ratio = -mean_positive_profit / mean_positive_loss negative_wining_ratio = number_negative_profit / number_negative negative_profit_loss_ratio = -mean_negative_profit / mean_negative_loss result.loc['开始时间', factor_name] = data.index[0] result.loc['结束时间', factor_name] = data.index[-1] result.loc['仓位相关系数', factor_name] = pos_corr result.loc['原始相关系数', factor_name] = raw_corr result.loc['多头收益', factor_name] = mean_positive_profit result.loc['多头损失', factor_name] = mean_positive_loss result.loc['空头收益', factor_name] = mean_negative_profit result.loc['空头损失', factor_name] = mean_negative_loss result.loc['空仓收益', factor_name] = mean_zero result.loc['多头信号数量', factor_name] = number_positive result.loc['空头信号数量', factor_name] = number_negative result.loc['空仓信号数量', factor_name] = number_zero result.loc['多头胜率', factor_name] = positive_wining_ratio result.loc['多头盈亏比', factor_name] = positive_profit_loss_ratio result.loc['空头胜率', factor_name] = negative_wining_ratio result.loc['空头盈亏比', factor_name] = negative_profit_loss_ratio file = os.path.join(self.data_path, 'factor_backtest', factor_name + '_Result.csv') result.to_csv(file) file = os.path.join(self.data_path, 'factor_backtest', factor_name + '_Return.csv') data.to_csv(file)
def cal_factor_exposure(self, beg_date="20060101", end_date=datetime.today().strftime("%Y%m%d")): """ 计算因子暴露 计算beta和残差收益率 残差收益率还要计算残差波动率 """ # params term = 252 half_life = 63 min_periods = 40 # read data pct = Stock().read_factor_h5("Pct_chg") pct = Stock().replace_suspension_with_nan(pct) pct = Stock().fillna_with_mad_market(pct) pct = pct.T index_pct = Index().get_index_factor("000985.CSI", attr=['PCT']) * 100 index_pct.columns = ['Index'] risk_free_rate = Macro().get_daily_risk_free_rate() # calculate data daily date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list( set(pct.index) & set(date_series) & set(index_pct.index) & set(risk_free_rate.index)) date_series.sort() beta = pd.DataFrame([], columns=date_series, index=pct.columns) res_pct = pd.DataFrame([], columns=date_series, index=pct.columns) for i_date in range(0, len(date_series)): current_date = date_series[i_date] print('Calculating Barra Risk factor %s at date %s' % (self.factor_name, current_date)) data_beg_date = Date().get_trade_date_offset( current_date, -(term - 1)) pct_before = pct.loc[data_beg_date:current_date, :] index_pct_before = index_pct.loc[data_beg_date:current_date, :] risk_free_rate_before = risk_free_rate.loc[ data_beg_date:current_date, :] for i_code in range(len(pct_before.columns)): stock_code = pct_before.columns[i_code] pct_before_stock = pd.DataFrame(pct_before[stock_code]) concat_data = pd.concat([ index_pct_before, risk_free_rate_before, pct_before_stock ], axis=1) concat_data = concat_data.dropna() weight = pd.DataFrame(TimeSeriesWeight().exponential_weight( len(concat_data), half_life), index=concat_data.index, columns=['Weight']) concat_data['Weight'] = weight concat_data['ones'] = 1.0 concat_data["Index"] -= concat_data["RiskFreeRate"] concat_data[stock_code] -= concat_data["RiskFreeRate"] if len(concat_data) > min_periods: x = concat_data[['ones', "Index"]].values y = concat_data[stock_code].values model = sm.WLS(y, x, weights=concat_data['Weight'].values).fit() res_series = y - model.fittedvalues beta.loc[stock_code, current_date] = model.params[1] res_pct.loc[stock_code, current_date] = res_series[-1] print(stock_code, current_date, model.params[1], res_series[-1]) # save data beta = beta.T.dropna(how='all').T res_pct = res_pct.T.dropna(how='all').T self.save_risk_factor_exposure(beta, self.raw_factor_name) self.save_risk_factor_exposure(res_pct, self.raw_res_pct_factor_name) beta = FactorPreProcess().remove_extreme_value_mad(beta) beta = FactorPreProcess().standardization(beta) self.save_risk_factor_exposure(beta, self.factor_name)
def calculate_fund_factor_date(date, index_code, index_name, out_path): # 参数 ######################################################################################################## # index_code = '000905.SH' # index_name = '中证500' # date = '2015-12-31' # min_period = 200 # out_path = 'E:\\4_代码\\pycharmprojects\\31_雪球优选增强基金\\output_data\\cal_fund_factor\\zz500\\' # 日期数据 ######################################################################################################## min_period = 200 date_cur = datetime.strptime(date, "%Y%m%d") date_cur_int = date_cur.strftime('%Y%m%d') date_bef_1y = datetime(year=date_cur.year-1, month=date_cur.month, day=date_cur.day).strftime("%Y-%m-%d") date_aft_hy = (date_cur + pd.tseries.offsets.DateOffset(months=6, days=0)).strftime("%Y-%m-%d") # 读取数据 基金池 基金净值数据 指数收盘价数据 ######################################################################################################## path = os.path.join(out_path, 'filter_fund_pool\\') file = os.path.join(path, '基金最终筛选池_' + index_name + '.xlsx') fund_code = pd.read_excel(file, index_col=[1], encoding='gbk') fund_nav = Fund().get_fund_factor("Repair_Nav") index_close = Index().get_index_factor(index_code, None, None, attr=['CLOSE']) index_close.columns = [index_code] # 筛选新基金 并下载基金规模 ####################################################################################################### fund_code = fund_code.ix[:, ['上市日期', '基金简称']] fund_code = fund_code[fund_code['上市日期'] < date_bef_1y] fund_code_str = ','.join(fund_code.index) fund_asset = w.wss(fund_code_str, "netasset_total", "unit=1;tradeDate=" + str(date)) fund_asset = pd.DataFrame(fund_asset.Data, index=['基金规模'], columns=fund_asset.Codes).T fund_asset['基金规模'] /= 100000000.0 fund_asset['基金规模'] = fund_asset['基金规模'].round(2) fund_asset = fund_asset[fund_asset['基金规模'] > 0.5] fund_info = pd.concat([fund_code, fund_asset], axis=1) fund_info = fund_info.dropna() # 计算最近1年 各项指标 ######################################################################################################## result = pd.DataFrame([], index=fund_code.index, columns=['最近1年跟踪误差']) fund_nav = fund_nav.ix[index_close.index, fund_code.index] fund_pct = fund_nav.pct_change() index_pct = index_close.pct_change() index_pct = index_pct[index_code] fund_excess_pct = fund_pct.sub(index_pct, axis='index') fund_excess_pct_period = fund_excess_pct.ix[date_bef_1y:date, :] fund_nav_period = fund_nav.ix[date_bef_1y:date, :] index_close_prioed = index_close.ix[date_bef_1y:date, :] result.ix[:, "最近1年数据长度"] = fund_excess_pct_period.count() result.ix[:, "最近1年跟踪误差"] = fund_excess_pct_period.std() * np.sqrt(250) # last_date_nav = fund_nav_period.iloc[len(fund_nav_period)-1, :] # first_date_nav = fund_nav_period.iloc[0, :] fund_return_log =(fund_nav_period.pct_change()+1.0).applymap(np.log).cumsum().ix[-1,:] fund_return = fund_return_log.map(np.exp) - 1 last_date_close = index_close_prioed.iloc[len(fund_nav_period)-1, :] first_date_close = index_close_prioed.iloc[0, :] result.ix[:, "最近1年基金涨跌"] = fund_return result.ix[:, "最近1年指数涨跌"] = (last_date_close / first_date_close - 1.0).values[0] result.ix[:, "最近1年超额收益"] = result.ix[:, "最近1年基金涨跌"] - result.ix[:, "最近1年指数涨跌"] result.ix[:, "最近1年信息比率"] = result.ix[:, "最近1年超额收益"] / result.ix[:, "最近1年跟踪误差"] result = result[result['最近1年数据长度'] > min_period] # 计算之后半年 各项指标 ######################################################################################################## fund_excess_pct_period = fund_excess_pct.ix[date:date_aft_hy, :] fund_nav_period = fund_nav.ix[date:date_aft_hy, :] index_close_prioed = index_close.ix[date:date_aft_hy, :] result.ix[:, "之后半年数据长度"] = fund_excess_pct_period.count() result.ix[:, "之后半年跟踪误差"] = fund_excess_pct_period.std() * np.sqrt(250) try: fund_return_log = (fund_nav_period.pct_change() + 1.0).applymap(np.log).cumsum().ix[-1, :] fund_return = fund_return_log.map(np.exp) - 1 result.ix[:, "之后半年基金涨跌"] = fund_return except: result.ix[:, "之后半年基金涨跌"] = np.nan try: last_date_close = index_close_prioed.iloc[len(fund_nav_period) - 1, :] first_date_close = index_close_prioed.iloc[0, :] result.ix[:, "之后半年指数涨跌"] = (last_date_close / first_date_close - 1.0).values[0] except: result.ix[:, "之后半年指数涨跌"] = np.nan result.ix[:, "之后半年超额收益"] = result.ix[:, "之后半年基金涨跌"] - result.ix[:, "之后半年指数涨跌"] result.ix[:, "之后半年信息比率"] = result.ix[:, "之后半年超额收益"] / result.ix[:, "之后半年跟踪误差"] result = pd.concat([fund_info, result], axis=1) result = result.dropna(subset=["基金规模"]) result = result.fillna("") # 写到EXCEL表 ################################################################################################ out_path = os.path.join(out_path, "cal_fund_factor\\" + index_name) num_format_pd = pd.DataFrame([], columns=result.columns, index=['format']) num_format_pd.ix['format', :] = '0.00%' num_format_pd.ix['format', '之后半年数据长度'] = '0.00' num_format_pd.ix['format', '之后半年信息比率'] = '0.00' num_format_pd.ix['format', '基金规模'] = '0.00' num_format_pd.ix['format', '最近1年信息比率'] = '0.00' num_format_pd.ix['format', '最近1年数据长度'] = '0.00' begin_row_number = 0 begin_col_number = 0 color = "red" file_name = os.path.join(out_path, '基金指标_' + index_name + '_' + date_cur_int + '.xlsx') sheet_name = "基金指标" write_pandas(file_name, sheet_name, begin_row_number, begin_col_number, result, num_format_pd, color) ################################################################################################################ return True
def filter_fund_pool(index_code, begin_date, end_date, min_period, ipo_date, track_error_up, index_name, out_path): ############################################################################################# # begin_date = '2017-05-31' # end_date = '2018-05-31' # ipo_date = '2017-05-31' # min_period = 200 # index_name = '沪深300' # index_code = '000300.SH' # 读取数据 ############################################################################################# fund_nav = Fund().get_fund_factor("Repair_Nav") index_close = Index().get_index_factor(index_code, None, None, attr=['CLOSE']) index_close.columns = [index_code] result = pd.DataFrame([], index=fund_nav.columns, columns=['最近1年跟踪误差', '有效数据长度']) # 计算最近1年跟踪误差数据 ############################################################################################# fund_nav = fund_nav.ix[index_close.index, :] fund_pct = fund_nav.pct_change() index_pct = index_close.pct_change() index_pct = index_pct[index_code] fund_excess_pct = fund_pct.sub(index_pct, axis='index') fund_excess_pct_period = fund_excess_pct.ix[begin_date: end_date] result.ix[:, "有效数据长度"] = fund_excess_pct_period.count() result.ix[:, "最近1年跟踪误差"] = fund_excess_pct_period.std() * np.sqrt(250) # 筛选 ############################################################################################# result = result.dropna() result = result[result['有效数据长度'] > min_period] result = result[result['最近1年跟踪误差'] < track_error_up] code_str = ','.join(result.index) data = w.wss(code_str, "fund_benchmark,fund_fullname,fund_setupdate,fund_investtype") data_pd = pd.DataFrame(data.Data, index=data.Fields, columns=data.Codes).T data_pd.columns = ['基金基准', '基金全称', '上市日期', '基金类型'] data_pd['上市日期'] = data_pd['上市日期'].map(lambda x: x.strftime('%Y-%m-%d')) result = pd.concat([data_pd, result], axis=1) result = result[result["基金基准"].map(lambda x: index_name in x)] result = result[result["上市日期"] < ipo_date] result = result[result["基金全称"].map(lambda x: "交易型开放式指数" not in x)] result = result[result["基金全称"].map(lambda x: "联接" not in x)] # 输出结果 ############################################################################################ out_path = os.path.join(out_path, "filter_fund_pool") num_format_pd = pd.DataFrame([], columns=result.columns, index=['format']) num_format_pd.ix['format', :] = '0.00' begin_row_number = 0 begin_col_number = 1 color = "red" file_name = os.path.join(out_path, '基金初次筛选池_' + index_name + '.xlsx') sheet_name = "基金筛选池" write_pandas(file_name, sheet_name, begin_row_number, begin_col_number, result, num_format_pd, color)
def Index_Group_Statistics(out_path, index_code, value_factor, group_number): # 0、输入参数 ############################################################################## # out_path = 'C:\\Users\\doufucheng\\OneDrive\\Desktop\\data\\' # index_code = "000300.SH" # value_factor = 'PE_TTM' # group_number = 8 # 1、原始数据整理 ############################################################################## # 1、1: 读入指数收益率数据和pe ################################### data = Index().get_index_factor(index_code, None, None, ['PCT', value_factor]) data = data.dropna(subset=[value_factor]) data.columns = ['pct', 'pe'] data['pe'] = data['pe'].round(2) year_number = 242 # 1、2: 指数 日收益 累计收益 ################################### data['ln_pct'] = np.log(data['pct'] + 1) data['cum_sum_pct'] = data['ln_pct'].cumsum().map(lambda x: np.exp(x) - 1) # 1、3: 之后1、3、5年的收益 ################################### data['return_1y'] = data['ln_pct'].rolling( window=year_number).sum().shift(-year_number) data['return_3y'] = data['ln_pct'].rolling(window=year_number * 3).sum().shift(-year_number * 3) data['return_5y'] = data['ln_pct'].rolling(window=year_number * 5).sum().shift(-year_number * 5) # 1、4: 之后1、3、5年的收益是否大于0 ################################### data['if_zero_1y'] = data['return_1y'] > 0.0 data['if_zero_3y'] = data['return_3y'] > 0.0 data['if_zero_5y'] = data['return_5y'] > 0.0 # 1、5: 在全局的pe百分比 ################################### data['rank'] = data['pe'].rank() / len(data) data['rank'] *= 100 data['rank'] = data['rank'].round(0) # 1、7: 之后1年超过初始PE的时间 ################################### for i in range(0, len(data) - year_number): init_pe = data.ix[i, "pe"] data_bigger = data.ix[i:i + year_number, 'pe'] > init_pe data_bigger = data_bigger[data_bigger] ratio = len(data_bigger) / year_number data.ix[i, 'if_1y_ratio'] = ratio # 1、8: 之后3年超过初始PE的时间 ################################### for i in range(0, len(data) - year_number * 3): init_pe = data.ix[i, "pe"] data_bigger = data.ix[i:i + year_number * 3, 'pe'] > init_pe data_bigger = data_bigger[data_bigger] ratio = len(data_bigger) / (year_number * 3) data.ix[i, 'if_3y_ratio'] = ratio # 1、9: 之后5年超过初始PE的时间 ################################### for i in range(0, len(data) - year_number * 5): init_pe = data.ix[i, "pe"] data_bigger = data.ix[i:i + year_number * 5, 'pe'] > init_pe data_bigger = data_bigger[data_bigger] ratio = len(data_bigger) / (year_number * 5) data.ix[i, 'if_5y_ratio'] = ratio data['pe_cut'] = pd.qcut(data['pe'], group_number) data.to_csv(out_path + index_code + '_原始数据.csv') # 2、分组统计 ############################################################################## # 2、1: 1年后 收益中位数、pe超过初始PE的百分比、有效数字的个数 ######################################################## my_data = data.dropna(subset=['return_1y']) my_data['pe_cut'] = pd.qcut(data['pe'], group_number) if_pe_1y_ratio = my_data.groupby(by=['pe_cut'])['if_1y_ratio'].mean() return_1y_median = my_data.groupby(by=['pe_cut'])['return_1y'].median() return_1y_count = my_data.groupby(by=['pe_cut'])['return_1y'].count() my_data['pe_rank_cut'] = pd.qcut(data['rank'], group_number) return_pe_rank = my_data.groupby(by=['pe_rank_cut'])['return_1y'].median() return_pe_rank = pd.DataFrame( return_pe_rank.index.values.to_dense(), index=return_1y_median.index.values.to_dense(), columns=['历史百分位数']) return_pe_rank['开始时间'] = data.index[0] return_pe_rank['结束时间'] = data.index[-1] return_pe_rank['当前PE'] = data.ix[-1, 'pe'] # 1年后 收益大于0的百分比 if_zero_number = my_data.groupby(by=['pe_cut'])['if_zero_1y'].sum() sum_number = my_data.groupby(by=['pe_cut'])['if_zero_1y'].count() zero_ratio_1y = pd.DataFrame(if_zero_number / sum_number) # 2、1: 3年后 收益中位数、pe超过初始PE的百分比、有效数字的个数 ####################################################### my_data = data.dropna(subset=['return_3y']) my_data['pe_cut'] = pd.qcut(data['pe'], group_number) if_pe_3y_ratio = my_data.groupby(by=['pe_cut'])['if_3y_ratio'].median() return_3y_median = my_data.groupby(by=['pe_cut'])['return_3y'].median() return_3y_count = my_data.groupby(by=['pe_cut'])['return_3y'].count() if_zero_number = my_data.groupby(by=['pe_cut'])['if_zero_3y'].sum() sum_number = my_data.groupby(by=['pe_cut'])['if_zero_3y'].count() zero_ratio_3y = pd.DataFrame(if_zero_number / sum_number) # 2、3: 5年后 收益中位数、pe超过初始PE的百分比、有效数字的个数 ####################################################### my_data = data.dropna(subset=['return_5y']) my_data['pe_cut'] = pd.qcut(data['pe'], group_number) if_pe_5y_ratio = my_data.groupby(by=['pe_cut'])['if_5y_ratio'].median() return_5y_median = my_data.groupby(by=['pe_cut'])['return_5y'].median() return_5y_count = my_data.groupby(by=['pe_cut'])['return_5y'].count() if_zero_number = my_data.groupby(by=['pe_cut'])['if_zero_5y'].sum() sum_number = my_data.groupby(by=['pe_cut'])['if_zero_5y'].count() zero_ratio_5y = pd.DataFrame(if_zero_number / sum_number) # 数据输出 ############################################################################## res = pd.concat([ return_pe_rank, return_1y_count, return_3y_count, return_5y_count, return_1y_median, return_3y_median, return_5y_median, zero_ratio_1y, zero_ratio_3y, zero_ratio_5y, if_pe_1y_ratio, if_pe_3y_ratio, if_pe_5y_ratio ], axis=1) res.index.name = "PE绝对值范围" res.columns = [ "PE百分位范围", '开始时间', '结束时间', '当前PE', '有效数据个数_1y', '有效数据个数_3y', '有效数据个数_5y', '收益中位数_1y', '收益中位数_3y', '收益中位数_5y', '收益大于0的比例_1y', '收益大于0的比例_3y', '收益大于0的比例_5y', '超过初始PE天数的比例的中位数_1y', '超过初始PE天数的比例的中位数_3y', '超过初始PE天数的比例的中位数_5y' ] res.index = res.index.values.to_dense() res.index.name = "PE绝对值范围" num_format_pd = pd.DataFrame([], columns=res.columns, index=['format']) num_format_pd.ix['format', :] = '0.00%' num_format_pd.ix[ 'format', ['开始时间', '结束时间', '当前PE', '有效数据个数_1y', '有效数据个数_3y', '有效数据个数_5y' ]] = '0.0' begin_row_number = 0 begin_col_number = 1 color = "red" file_name = out_path + index_code + '_收益中位数.xlsx' sheet_name = "收益中位数" write_pandas(file_name, sheet_name, begin_row_number, begin_col_number, res, num_format_pd, color)