def test_all(): index_code = '000016.SH' fixed_pct_yearly = 0.04 target_nav = 1.08 date_data = Date().get_trade_date_series("20071231", datetime.today(), "M") for i in range(len(date_data) - 12): init_date = date_data.index[i] end_date = date_data.index[i + 12] print(init_date, end_date) res = index_position(index_code, fixed_pct_yearly, target_nav, init_date, end_date) if i == 0: result = res else: result = pd.concat([result, res], axis=0) ############################################################################################ num_format_pd = pd.DataFrame([], columns=result.columns, index=['format']) num_format_pd.ix['format', :] = '0.00' save_path = 'C:\\Users\\doufucheng\\OneDrive\\Desktop\\data\\' begin_row_number = 0 begin_col_number = 1 color = "red" file_name = save_path + index_code + ".xlsx" sheet_name = "指数测试" result.to_csv(save_path + index_code + ".csv") write_pandas(file_name, sheet_name, begin_row_number, begin_col_number, result, num_format_pd, color) print(result)
def fund_score_date(end_date, halfyear_date, index_name, index_code, path): # 参数 ################################################################################################################## # report_date = "2017-12-31" # report_date_last = "2017-06-30" # index_name = '中证500' # path = 'E:\\4_代码\\pycharmprojects\\31_雪球优选增强基金\\output_data\\' # 读取数据 基金指标 和 风格暴露 ################################################################################################################## sub_path = path + 'cal_fund_factor\\' + index_name + '\\' filename = sub_path + '基金指标_' + index_name + '_' + end_date + '.xlsx' fund_factor = pd.read_excel(filename, index_col=[0], encoding='gbk') # 超额收益小于0的不参与基金打分 # fund_factor = fund_factor[fund_factor['最近1年超额收益'] > -1.0] sub_path = path + 'exposure\\' + index_name + '\\' filename = sub_path + 'BARRA风格暴露_' + index_name + '_' + halfyear_date + '.xlsx' exposure = pd.read_excel(filename, index_col=[0], encoding='gbk') exposure = exposure - exposure.ix[index_code, :] exposure = exposure.abs() exp_col = ['贝塔', '账面市值比', '盈利', '成长', '杠杆', '流动性', '动量', '残差波动率', '市值', '非线性市值'] exposure.columns = exp_col exposure = exposure.round(2) # 计算跟踪误差得分 ################################################################################################################## result = pd.DataFrame([], index=fund_factor.index) result.ix[:, '基金名称'] = fund_factor.ix[:, '基金简称'] result.ix[:, '上期超额收益'] = fund_factor.ix[:, '最近1年超额收益'] result.ix[:, '下期超额收益'] = fund_factor.ix[:, '之后半年超额收益'] if index_name == '中证500': val_25 = max(0.03, fund_factor.ix[:, '最近1年跟踪误差'].quantile(0.20)) val_75 = min(0.08, fund_factor.ix[:, '最近1年跟踪误差'].quantile(0.80)) else: val_25 = max(0.02, fund_factor.ix[:, '最近1年跟踪误差'].quantile(0.20)) val_75 = min(0.05, fund_factor.ix[:, '最近1年跟踪误差'].quantile(0.80)) result.ix[:, '跟踪误差'] = fund_factor.ix[:, '最近1年跟踪误差'] result.ix[:, '跟踪误差得分'] = fund_factor.ix[:, '最近1年跟踪误差'].map(lambda x: score_quarter_reverse(x, val_25, val_75)).round(2) # 计算超额收益得分 ################################################################################################################## if index_name == '中证500': val_25 = max(0.00, fund_factor.ix[:, '最近1年超额收益'].quantile(0.20)) val_75 = min(0.15, fund_factor.ix[:, '最近1年超额收益'].quantile(0.80)) else: val_25 = max(0.00, fund_factor.ix[:, '最近1年超额收益'].quantile(0.20)) val_75 = min(0.10, fund_factor.ix[:, '最近1年超额收益'].quantile(0.80)) result.ix[:, '超额收益'] = fund_factor.ix[:, '最近1年超额收益'] result.ix[:, '超额收益得分'] = fund_factor.ix[:, '最近1年超额收益'].map(lambda x: score_quarter(x, val_25, val_75)).round(2) # 计算信息比率得分 ################################################################################################################## val_25 = fund_factor.ix[:, '最近1年信息比率'].quantile(0.20) val_75 = fund_factor.ix[:, '最近1年信息比率'].quantile(0.80) result.ix[:, '信息比率'] = fund_factor.ix[:, '最近1年信息比率'].round(2) result.ix[:, '信息比率得分'] = fund_factor.ix[:, '最近1年信息比率'].map(lambda x: score_quarter(x, val_25, val_75)).round(2) # 计算风格偏露得分 ################################################################################################################## result = pd.concat([result, exposure.iloc[:, 0:10]], axis=1) result = result.dropna(subset=["基金名称"]) result.ix[:, '风格暴露得分'] = 0.0 for i_col in range(10): col = exposure.columns[i_col] result.ix[:, '风格暴露得分'] += result.ix[:, col].map(score_ex) result.ix[:, '风格暴露得分'] = result.ix[:, '风格暴露得分'] * 0.5 val_25 = result.ix[:, '风格暴露得分'].quantile(0.20) val_75 = result.ix[:, '风格暴露得分'].quantile(0.80) result.ix[:, '风格暴露得分'] = result.ix[:, '风格暴露得分'].map(lambda x: score_quarter(x, val_25, val_75)).round(2) # 计算总得分 ################################################################################################################## if index_name == "中证500": result.ix[:, '总得分'] = result['跟踪误差得分'] * 0.10 + result['超额收益得分'] * 0.40 + \ result['信息比率得分'] * 0.20 + result['风格暴露得分'] * 0.30 else: # result.ix[:, '总得分'] = result['跟踪误差得分'] * 0.05 + result['超额收益得分'] * 0.50 + \ # result['信息比率得分'] * 0.30 + result['风格暴露得分'] * 0.15 result.ix[:, '总得分'] = result['跟踪误差得分'] * 0.10 + result['超额收益得分'] * 0.40 + \ result['信息比率得分'] * 0.20 + result['风格暴露得分'] * 0.30 result = result.sort_values(by=['总得分'], ascending=False) col = ["基金名称", "下期超额收益", '总得分', '跟踪误差', '跟踪误差得分', '超额收益', '超额收益得分', '信息比率', '信息比率得分', '风格暴露得分', '上期超额收益'] col.extend(exp_col[0:10]) result = result[col] # 写到EXCEL表 ################################################################################################################## num_format_pd = pd.DataFrame([], columns=result.columns, index=['format']) num_format_pd.ix['format', :] = '0.00' num_format_pd.ix['format', ['跟踪误差', '超额收益', '下期超额收益', '上期超额收益']] = '0.00%' begin_row_number = 0 begin_col_number = 0 color = "red" file_name = path + 'score_fund\\' + index_name + '\\' + '基金得分_' + index_name + '_' + end_date + '.xlsx' sheet_name = "基金得分" write_pandas(file_name, sheet_name, begin_row_number, begin_col_number, result, num_format_pd, color)
def nav(index_code, index_name, now_date): # 参数 ################################################################################################################## # index_name = '中证500' # index_code = '000905.SH' # now_date = "20180715" # 日期 ################################################################################################################## date_series = Date().get_normal_date_series("20130622", datetime.today(), period="S") date_series.append(now_date) # 循环计算基金净值 ################################################################################################################## path = 'E:\\4_代码\\pycharmprojects\\31_雪球优选增强基金\\output_data\\' file_name = path + 'select_fund\\' + index_name + '\\' + '基金每期持仓_' + index_name + '.xlsx' fund_holding_total = pd.read_excel(file_name, index_col=[0], encoding='gbk') fund_holding_total['换仓日期'] = fund_holding_total['换仓日期'].map(str) nav_total = pd.DataFrame([]) for i_date in range(0, len(date_series) - 2): report_date_str = date_series[i_date] report_date_str_after = date_series[i_date + 1] print(report_date_str, report_date_str_after) fund_holding_date = fund_holding_total[fund_holding_total['换仓日期'] == report_date_str] fund_code_list = list(fund_holding_date['基金代码'].values) fund_data = Fund().get_fund_factor("Repair_Nav", None, None) fund_data = fund_data.ix[report_date_str:report_date_str_after, fund_code_list] fund_data = fund_data / fund_data.iloc[0, :] fund_data = fund_data.dropna() nav_array = fund_data.values weight_array = np.row_stack(fund_holding_date.ix[:, '权重'].values) fund_data.loc[:, '每日总资产'] = np.dot(nav_array, weight_array) fund_data['每日涨跌幅'] = fund_data['每日总资产'].pct_change() fund_data = fund_data.dropna() nav_total = pd.concat([nav_total, fund_data['每日涨跌幅']], axis=0) nav_total = nav_total[~nav_total.index.duplicated()] index_pct = Index().get_index_factor(index_code, None, None, ["PCT"]) result = pd.concat([nav_total, index_pct], axis=1) result = result.dropna() result.columns = ['基金组合日涨跌', '指数日涨跌'] result['基金组合累计收益'] = (result['基金组合日涨跌'] + 1).cumprod() - 1 result['指数累计收益'] = (result['指数日涨跌'] + 1).cumprod() - 1 result['超额累计收益'] = result['基金组合累计收益'] - result['指数累计收益'] num_format_pd = pd.DataFrame([], columns=result.columns, index=['format']) num_format_pd.ix['format', :] = '0.00%' begin_row_number = 0 begin_col_number = 0 color = "red" file_name = path + 'nav\\' + index_name + '\\' + '基金回测净值_' + index_name + '.xlsx' sheet_name = "基金回测净值" write_pandas(file_name, sheet_name, begin_row_number, begin_col_number, result, num_format_pd, color)
def backtest(index_name, now_date, path): # 参数 ################################################################################################################## # index_name = '中证500' # now_date = "20180715" # path = 'E:\\3_数据\\4_fund_data\\6_index_enhanced_fund_snowball\\' # 日期 ################################################################################################################## date_series = Date().get_normal_date_series("20130622", datetime.today(), period="S") # date_series.remove("20180630") date_series.append(now_date) # 循环取出持仓基金 ################################################################################################################## out_path = path + 'score_fund\\' + index_name + '\\' holding_fund_total = pd.DataFrame([]) holding_fund_pct = pd.DataFrame([], index=date_series, columns=['下期超额收益']) for i_date in range(0, len(date_series)): report_date = date_series[i_date] print(report_date) filename = out_path + '基金得分_' + index_name + '_' + report_date + '.xlsx' fund_score = pd.read_excel(filename, index_col=[0], encoding='gbk') fund_score = fund_score[[ '基金名称', '下期超额收益', '总得分', '跟踪误差得分', '超额收益得分', '信息比率得分', '风格暴露得分', '上期超额收益' ]] fund_score['基金公司'] = fund_score['基金名称'].map(lambda x: x[0:2]) fund_score['基金代码'] = fund_score.index fund_score_drop = fund_score.groupby( ['基金公司']).apply(lambda i: i.iloc[0, :] if len(i) >= 1 else np.nan) fund_score_drop = fund_score_drop.sort_values(by=['总得分'], ascending=False) fund_score_drop_0 = fund_score_drop[fund_score_drop['上期超额收益'] > 0.0] if len(fund_score_drop_0) >= 5: fund_score_drop = fund_score_drop_0 fund_score_five = fund_score_drop.iloc[0:min(5, len(fund_score)), :] if len(fund_score_five) > 0.0: fund_score_five['权重'] = 1.0 / len(fund_score_five) fund_score_five['换仓日期'] = report_date holding_fund_total = pd.concat( [holding_fund_total, fund_score_five], axis=0) pct = (fund_score_five['权重'] * fund_score_five['下期超额收益']).sum() holding_fund_pct.ix[report_date, "下期超额收益"] = pct # 循环取出持仓基金 ################################################################################################################## num_format_pd = pd.DataFrame([], columns=holding_fund_total.columns, index=['format']) num_format_pd.ix['format', :] = '0.00' num_format_pd.ix['format', ['下期超额收益', '上期超额收益']] = '0.00%' begin_row_number = 0 begin_col_number = 0 color = "red" file_name = path + 'select_fund\\' + index_name + '\\' + '基金每期持仓_' + index_name + '.xlsx' sheet_name = "基金每期持仓" write_pandas(file_name, sheet_name, begin_row_number, begin_col_number, holding_fund_total, num_format_pd, color) num_format_pd = pd.DataFrame([], columns=holding_fund_pct.columns, index=['format']) num_format_pd.ix['format', :] = '0.00' num_format_pd.ix['format', '下期超额收益'] = '0.00%' begin_row_number = 0 begin_col_number = 0 color = "red" file_name = path + 'select_fund\\' + index_name + '\\' + '基金每期超额收益_' + index_name + '.xlsx' sheet_name = "基金每期超额收益" write_pandas(file_name, sheet_name, begin_row_number, begin_col_number, holding_fund_pct, num_format_pd, color)
def calculate_fund_factor_date(date, index_code, index_name, out_path): # 参数 ######################################################################################################## # index_code = '000905.SH' # index_name = '中证500' # date = '2015-12-31' # min_period = 200 # out_path = 'E:\\4_代码\\pycharmprojects\\31_雪球优选增强基金\\output_data\\cal_fund_factor\\zz500\\' # 日期数据 ######################################################################################################## min_period = 200 date_cur = datetime.strptime(date, "%Y%m%d") date_cur_int = date_cur.strftime('%Y%m%d') date_bef_1y = datetime(year=date_cur.year-1, month=date_cur.month, day=date_cur.day).strftime("%Y-%m-%d") date_aft_hy = (date_cur + pd.tseries.offsets.DateOffset(months=6, days=0)).strftime("%Y-%m-%d") # 读取数据 基金池 基金净值数据 指数收盘价数据 ######################################################################################################## path = os.path.join(out_path, 'filter_fund_pool\\') file = os.path.join(path, '基金最终筛选池_' + index_name + '.xlsx') fund_code = pd.read_excel(file, index_col=[1], encoding='gbk') fund_nav = Fund().get_fund_factor("Repair_Nav") index_close = Index().get_index_factor(index_code, None, None, attr=['CLOSE']) index_close.columns = [index_code] # 筛选新基金 并下载基金规模 ####################################################################################################### fund_code = fund_code.ix[:, ['上市日期', '基金简称']] fund_code = fund_code[fund_code['上市日期'] < date_bef_1y] fund_code_str = ','.join(fund_code.index) fund_asset = w.wss(fund_code_str, "netasset_total", "unit=1;tradeDate=" + str(date)) fund_asset = pd.DataFrame(fund_asset.Data, index=['基金规模'], columns=fund_asset.Codes).T fund_asset['基金规模'] /= 100000000.0 fund_asset['基金规模'] = fund_asset['基金规模'].round(2) fund_asset = fund_asset[fund_asset['基金规模'] > 0.5] fund_info = pd.concat([fund_code, fund_asset], axis=1) fund_info = fund_info.dropna() # 计算最近1年 各项指标 ######################################################################################################## result = pd.DataFrame([], index=fund_code.index, columns=['最近1年跟踪误差']) fund_nav = fund_nav.ix[index_close.index, fund_code.index] fund_pct = fund_nav.pct_change() index_pct = index_close.pct_change() index_pct = index_pct[index_code] fund_excess_pct = fund_pct.sub(index_pct, axis='index') fund_excess_pct_period = fund_excess_pct.ix[date_bef_1y:date, :] fund_nav_period = fund_nav.ix[date_bef_1y:date, :] index_close_prioed = index_close.ix[date_bef_1y:date, :] result.ix[:, "最近1年数据长度"] = fund_excess_pct_period.count() result.ix[:, "最近1年跟踪误差"] = fund_excess_pct_period.std() * np.sqrt(250) # last_date_nav = fund_nav_period.iloc[len(fund_nav_period)-1, :] # first_date_nav = fund_nav_period.iloc[0, :] fund_return_log =(fund_nav_period.pct_change()+1.0).applymap(np.log).cumsum().ix[-1,:] fund_return = fund_return_log.map(np.exp) - 1 last_date_close = index_close_prioed.iloc[len(fund_nav_period)-1, :] first_date_close = index_close_prioed.iloc[0, :] result.ix[:, "最近1年基金涨跌"] = fund_return result.ix[:, "最近1年指数涨跌"] = (last_date_close / first_date_close - 1.0).values[0] result.ix[:, "最近1年超额收益"] = result.ix[:, "最近1年基金涨跌"] - result.ix[:, "最近1年指数涨跌"] result.ix[:, "最近1年信息比率"] = result.ix[:, "最近1年超额收益"] / result.ix[:, "最近1年跟踪误差"] result = result[result['最近1年数据长度'] > min_period] # 计算之后半年 各项指标 ######################################################################################################## fund_excess_pct_period = fund_excess_pct.ix[date:date_aft_hy, :] fund_nav_period = fund_nav.ix[date:date_aft_hy, :] index_close_prioed = index_close.ix[date:date_aft_hy, :] result.ix[:, "之后半年数据长度"] = fund_excess_pct_period.count() result.ix[:, "之后半年跟踪误差"] = fund_excess_pct_period.std() * np.sqrt(250) try: fund_return_log = (fund_nav_period.pct_change() + 1.0).applymap(np.log).cumsum().ix[-1, :] fund_return = fund_return_log.map(np.exp) - 1 result.ix[:, "之后半年基金涨跌"] = fund_return except: result.ix[:, "之后半年基金涨跌"] = np.nan try: last_date_close = index_close_prioed.iloc[len(fund_nav_period) - 1, :] first_date_close = index_close_prioed.iloc[0, :] result.ix[:, "之后半年指数涨跌"] = (last_date_close / first_date_close - 1.0).values[0] except: result.ix[:, "之后半年指数涨跌"] = np.nan result.ix[:, "之后半年超额收益"] = result.ix[:, "之后半年基金涨跌"] - result.ix[:, "之后半年指数涨跌"] result.ix[:, "之后半年信息比率"] = result.ix[:, "之后半年超额收益"] / result.ix[:, "之后半年跟踪误差"] result = pd.concat([fund_info, result], axis=1) result = result.dropna(subset=["基金规模"]) result = result.fillna("") # 写到EXCEL表 ################################################################################################ out_path = os.path.join(out_path, "cal_fund_factor\\" + index_name) num_format_pd = pd.DataFrame([], columns=result.columns, index=['format']) num_format_pd.ix['format', :] = '0.00%' num_format_pd.ix['format', '之后半年数据长度'] = '0.00' num_format_pd.ix['format', '之后半年信息比率'] = '0.00' num_format_pd.ix['format', '基金规模'] = '0.00' num_format_pd.ix['format', '最近1年信息比率'] = '0.00' num_format_pd.ix['format', '最近1年数据长度'] = '0.00' begin_row_number = 0 begin_col_number = 0 color = "red" file_name = os.path.join(out_path, '基金指标_' + index_name + '_' + date_cur_int + '.xlsx') sheet_name = "基金指标" write_pandas(file_name, sheet_name, begin_row_number, begin_col_number, result, num_format_pd, color) ################################################################################################################ return True
def filter_fund_pool(index_code, begin_date, end_date, min_period, ipo_date, track_error_up, index_name, out_path): ############################################################################################# # begin_date = '2017-05-31' # end_date = '2018-05-31' # ipo_date = '2017-05-31' # min_period = 200 # index_name = '沪深300' # index_code = '000300.SH' # 读取数据 ############################################################################################# fund_nav = Fund().get_fund_factor("Repair_Nav") index_close = Index().get_index_factor(index_code, None, None, attr=['CLOSE']) index_close.columns = [index_code] result = pd.DataFrame([], index=fund_nav.columns, columns=['最近1年跟踪误差', '有效数据长度']) # 计算最近1年跟踪误差数据 ############################################################################################# fund_nav = fund_nav.ix[index_close.index, :] fund_pct = fund_nav.pct_change() index_pct = index_close.pct_change() index_pct = index_pct[index_code] fund_excess_pct = fund_pct.sub(index_pct, axis='index') fund_excess_pct_period = fund_excess_pct.ix[begin_date: end_date] result.ix[:, "有效数据长度"] = fund_excess_pct_period.count() result.ix[:, "最近1年跟踪误差"] = fund_excess_pct_period.std() * np.sqrt(250) # 筛选 ############################################################################################# result = result.dropna() result = result[result['有效数据长度'] > min_period] result = result[result['最近1年跟踪误差'] < track_error_up] code_str = ','.join(result.index) data = w.wss(code_str, "fund_benchmark,fund_fullname,fund_setupdate,fund_investtype") data_pd = pd.DataFrame(data.Data, index=data.Fields, columns=data.Codes).T data_pd.columns = ['基金基准', '基金全称', '上市日期', '基金类型'] data_pd['上市日期'] = data_pd['上市日期'].map(lambda x: x.strftime('%Y-%m-%d')) result = pd.concat([data_pd, result], axis=1) result = result[result["基金基准"].map(lambda x: index_name in x)] result = result[result["上市日期"] < ipo_date] result = result[result["基金全称"].map(lambda x: "交易型开放式指数" not in x)] result = result[result["基金全称"].map(lambda x: "联接" not in x)] # 输出结果 ############################################################################################ out_path = os.path.join(out_path, "filter_fund_pool") num_format_pd = pd.DataFrame([], columns=result.columns, index=['format']) num_format_pd.ix['format', :] = '0.00' begin_row_number = 0 begin_col_number = 1 color = "red" file_name = os.path.join(out_path, '基金初次筛选池_' + index_name + '.xlsx') sheet_name = "基金筛选池" write_pandas(file_name, sheet_name, begin_row_number, begin_col_number, result, num_format_pd, color)
def Index_Group_Statistics(out_path, index_code, value_factor, group_number): # 0、输入参数 ############################################################################## # out_path = 'C:\\Users\\doufucheng\\OneDrive\\Desktop\\data\\' # index_code = "000300.SH" # value_factor = 'PE_TTM' # group_number = 8 # 1、原始数据整理 ############################################################################## # 1、1: 读入指数收益率数据和pe ################################### data = Index().get_index_factor(index_code, None, None, ['PCT', value_factor]) data = data.dropna(subset=[value_factor]) data.columns = ['pct', 'pe'] data['pe'] = data['pe'].round(2) year_number = 242 # 1、2: 指数 日收益 累计收益 ################################### data['ln_pct'] = np.log(data['pct'] + 1) data['cum_sum_pct'] = data['ln_pct'].cumsum().map(lambda x: np.exp(x) - 1) # 1、3: 之后1、3、5年的收益 ################################### data['return_1y'] = data['ln_pct'].rolling( window=year_number).sum().shift(-year_number) data['return_3y'] = data['ln_pct'].rolling(window=year_number * 3).sum().shift(-year_number * 3) data['return_5y'] = data['ln_pct'].rolling(window=year_number * 5).sum().shift(-year_number * 5) # 1、4: 之后1、3、5年的收益是否大于0 ################################### data['if_zero_1y'] = data['return_1y'] > 0.0 data['if_zero_3y'] = data['return_3y'] > 0.0 data['if_zero_5y'] = data['return_5y'] > 0.0 # 1、5: 在全局的pe百分比 ################################### data['rank'] = data['pe'].rank() / len(data) data['rank'] *= 100 data['rank'] = data['rank'].round(0) # 1、7: 之后1年超过初始PE的时间 ################################### for i in range(0, len(data) - year_number): init_pe = data.ix[i, "pe"] data_bigger = data.ix[i:i + year_number, 'pe'] > init_pe data_bigger = data_bigger[data_bigger] ratio = len(data_bigger) / year_number data.ix[i, 'if_1y_ratio'] = ratio # 1、8: 之后3年超过初始PE的时间 ################################### for i in range(0, len(data) - year_number * 3): init_pe = data.ix[i, "pe"] data_bigger = data.ix[i:i + year_number * 3, 'pe'] > init_pe data_bigger = data_bigger[data_bigger] ratio = len(data_bigger) / (year_number * 3) data.ix[i, 'if_3y_ratio'] = ratio # 1、9: 之后5年超过初始PE的时间 ################################### for i in range(0, len(data) - year_number * 5): init_pe = data.ix[i, "pe"] data_bigger = data.ix[i:i + year_number * 5, 'pe'] > init_pe data_bigger = data_bigger[data_bigger] ratio = len(data_bigger) / (year_number * 5) data.ix[i, 'if_5y_ratio'] = ratio data['pe_cut'] = pd.qcut(data['pe'], group_number) data.to_csv(out_path + index_code + '_原始数据.csv') # 2、分组统计 ############################################################################## # 2、1: 1年后 收益中位数、pe超过初始PE的百分比、有效数字的个数 ######################################################## my_data = data.dropna(subset=['return_1y']) my_data['pe_cut'] = pd.qcut(data['pe'], group_number) if_pe_1y_ratio = my_data.groupby(by=['pe_cut'])['if_1y_ratio'].mean() return_1y_median = my_data.groupby(by=['pe_cut'])['return_1y'].median() return_1y_count = my_data.groupby(by=['pe_cut'])['return_1y'].count() my_data['pe_rank_cut'] = pd.qcut(data['rank'], group_number) return_pe_rank = my_data.groupby(by=['pe_rank_cut'])['return_1y'].median() return_pe_rank = pd.DataFrame( return_pe_rank.index.values.to_dense(), index=return_1y_median.index.values.to_dense(), columns=['历史百分位数']) return_pe_rank['开始时间'] = data.index[0] return_pe_rank['结束时间'] = data.index[-1] return_pe_rank['当前PE'] = data.ix[-1, 'pe'] # 1年后 收益大于0的百分比 if_zero_number = my_data.groupby(by=['pe_cut'])['if_zero_1y'].sum() sum_number = my_data.groupby(by=['pe_cut'])['if_zero_1y'].count() zero_ratio_1y = pd.DataFrame(if_zero_number / sum_number) # 2、1: 3年后 收益中位数、pe超过初始PE的百分比、有效数字的个数 ####################################################### my_data = data.dropna(subset=['return_3y']) my_data['pe_cut'] = pd.qcut(data['pe'], group_number) if_pe_3y_ratio = my_data.groupby(by=['pe_cut'])['if_3y_ratio'].median() return_3y_median = my_data.groupby(by=['pe_cut'])['return_3y'].median() return_3y_count = my_data.groupby(by=['pe_cut'])['return_3y'].count() if_zero_number = my_data.groupby(by=['pe_cut'])['if_zero_3y'].sum() sum_number = my_data.groupby(by=['pe_cut'])['if_zero_3y'].count() zero_ratio_3y = pd.DataFrame(if_zero_number / sum_number) # 2、3: 5年后 收益中位数、pe超过初始PE的百分比、有效数字的个数 ####################################################### my_data = data.dropna(subset=['return_5y']) my_data['pe_cut'] = pd.qcut(data['pe'], group_number) if_pe_5y_ratio = my_data.groupby(by=['pe_cut'])['if_5y_ratio'].median() return_5y_median = my_data.groupby(by=['pe_cut'])['return_5y'].median() return_5y_count = my_data.groupby(by=['pe_cut'])['return_5y'].count() if_zero_number = my_data.groupby(by=['pe_cut'])['if_zero_5y'].sum() sum_number = my_data.groupby(by=['pe_cut'])['if_zero_5y'].count() zero_ratio_5y = pd.DataFrame(if_zero_number / sum_number) # 数据输出 ############################################################################## res = pd.concat([ return_pe_rank, return_1y_count, return_3y_count, return_5y_count, return_1y_median, return_3y_median, return_5y_median, zero_ratio_1y, zero_ratio_3y, zero_ratio_5y, if_pe_1y_ratio, if_pe_3y_ratio, if_pe_5y_ratio ], axis=1) res.index.name = "PE绝对值范围" res.columns = [ "PE百分位范围", '开始时间', '结束时间', '当前PE', '有效数据个数_1y', '有效数据个数_3y', '有效数据个数_5y', '收益中位数_1y', '收益中位数_3y', '收益中位数_5y', '收益大于0的比例_1y', '收益大于0的比例_3y', '收益大于0的比例_5y', '超过初始PE天数的比例的中位数_1y', '超过初始PE天数的比例的中位数_3y', '超过初始PE天数的比例的中位数_5y' ] res.index = res.index.values.to_dense() res.index.name = "PE绝对值范围" num_format_pd = pd.DataFrame([], columns=res.columns, index=['format']) num_format_pd.ix['format', :] = '0.00%' num_format_pd.ix[ 'format', ['开始时间', '结束时间', '当前PE', '有效数据个数_1y', '有效数据个数_3y', '有效数据个数_5y' ]] = '0.0' begin_row_number = 0 begin_col_number = 1 color = "red" file_name = out_path + index_code + '_收益中位数.xlsx' sheet_name = "收益中位数" write_pandas(file_name, sheet_name, begin_row_number, begin_col_number, res, num_format_pd, color)
def index_position(index_code, fixed_pct_yearly, target_nav, init_date, end_date): # 参数 #################################################################################################### # index_code = '000905.SH' # fixed_pct_yearly = 0.03 # init_date = datetime(2013, 1, 1) # end_date = datetime(2014, 6, 29) # target_nav = 1.06 init_nav = 1.00 init_target_pct = (target_nav / init_nav) - 1.0 expect_roe = 0.10 year_days = 250 max_stock_position = 0.80 max_expect_port_pct = 0.25 max_stock_pct = 0.30 min_stock_position = 0.00 # 计算 #################################################################################################### # # 得到数据 init_date = Date().get_trade_date_offset(init_date, -1) index_pct = Index(index_code).get_index_pct_period(init_date, end_date) pb = Index(index_code).get_index_attr_period(init_date, end_date, "PB_LF") data = pd.concat([index_pct, pb], axis=1) data.index.name = 'Date_Int' data.columns = ['Index_Pct', 'PB_LF'] data['Cum_Index_Pct'] = (data['Index_Pct'] + 1).cumprod() - 1.0 # init_pe = data.ix[0, "PE_TTM"] # 历史平均值 trade_days = len(data) before_date = Date().get_trade_date_offset(init_date, -trade_days) pb = Index(index_code).get_index_attr_period(before_date, init_date, "PB_LF") init_pe = pb.median() init_pe = data.ix[0, 'PB_LF'] expect_pe_change = init_pe / data.ix[0, 'PB_LF'] init_expect_stock_pct = expect_roe * trade_days / year_days + expect_pe_change - 1.0 fixed_pct = fixed_pct_yearly * trade_days / year_days # # 预期净值 预期股票收益 预期固定收益 data['Date_Number'] = range(1, trade_days + 1) data['Remain_Days'] = trade_days - data["Date_Number"] data['Cur_Target_Nav'] = target_nav / ( 1 + (data["Remain_Days"] / year_days) * fixed_pct_yearly) data['ROE'] = expect_roe data['Stock_Expected_ROE_Return'] = data.ROE * data.Remain_Days / year_days data['Init_Expected_PB'] = expect_pe_change * init_pe data['Cur_Expected_PB'] = (data.Init_Expected_PB * data.Remain_Days + data.PB_LF * data.Date_Number) / trade_days data['Stock_Expected_PB_Return'] = data.Cur_Expected_PB / data.PB_LF - 1.0 data['Stock_Expected_Return'] = (data['Stock_Expected_ROE_Return'] + 1.0) * (data['Stock_Expected_PB_Return'] + 1.0) - 1 data['Stock_Expected_Return'] = data['Stock_Expected_Return'].map( lambda x: min(x, max_stock_pct)) data['Fixed_Expected_Return'] = fixed_pct * data.Remain_Days / trade_days data['Fixed_Pct'] = fixed_pct / trade_days res = pd.DataFrame([], columns=['finish_nav']) res.ix[init_date, 'end_date'] = end_date flag = None number = 0 data['Exp_Stock_Position'] = np.nan # # 每天计算 for i_date in range(len(data)): if i_date == 0: cur_date = data.index[i_date] print(" Init ", cur_date) init_stock_position = (init_target_pct - fixed_pct) / ( init_expect_stock_pct - fixed_pct) init_stock_position = min(init_stock_position, max_stock_position) data.ix[i_date, 'Cur_Close_Stock_Position'] = init_stock_position data.ix[i_date, 'Cur_Close_Fixed_Position'] = 1.0 - init_stock_position data.ix[i_date, "Portfolio_Nav"] = init_nav data.ix[i_date, "Portfolio_Pct"] = 0.0 data.ix[i_date, "Change"] = "Change" number += 1 else: cur_date = data.index[i_date] before_nav = data.ix[i_date - 1, "Portfolio_Nav"] cur_target_nav = data.ix[i_date, "Cur_Target_Nav"] before_stock_position = data.ix[i_date - 1, 'Cur_Close_Stock_Position'] before_fixed_position = data.ix[i_date - 1, 'Cur_Close_Fixed_Position'] today_stock_pct = data.ix[i_date, 'Index_Pct'] today_fixed_pct = data.ix[i_date, 'Fixed_Pct'] today_stock_nav = before_stock_position * (today_stock_pct + 1.0) today_fixed_nav = before_fixed_position * (today_fixed_pct + 1.0) cur_stock_position = today_stock_nav / (today_stock_nav + today_fixed_nav) cur_fixed_position = today_fixed_nav / (today_stock_nav + today_fixed_nav) cur_port_pct = today_stock_pct * before_stock_position + today_fixed_pct * before_fixed_position cur_port_nav = before_nav * (1 + cur_port_pct) data.ix[i_date, 'Cur_Close_Stock_Position'] = cur_stock_position data.ix[i_date, 'Cur_Close_Fixed_Position'] = cur_fixed_position data.ix[i_date, "Portfolio_Pct"] = cur_port_pct data.ix[i_date, "Portfolio_Nav"] = cur_port_nav if cur_port_nav >= cur_target_nav: print(" Target Finish ", cur_date) data.ix[i_date, "Cur_Close_Stock_Position"] = 0.0 data.ix[i_date, "Cur_Close_Fixed_Position"] = 1.0 if flag is None: res.ix[init_date, 'finish_date'] = cur_date data.ix[i_date, "Change"] = "Change" number += 1 flag = 'f' else: # remain_days = data.ix[i_date, "Remain_Days"] # remain_ratio = remain_days / trade_days # expect_port_pct = (target_nav * remain_ratio + cur_port_nav * (1- remain_ratio)) / cur_port_nav - 1.0 expect_port_pct = target_nav / cur_port_nav - 1.0 expect_port_pct = min(expect_port_pct, max_expect_port_pct) data.ix[i_date, 'Exp_Portfolio_Pct'] = expect_port_pct expect_stock_pct = data.ix[i_date, 'Stock_Expected_Return'] expect_fixed_pct = data.ix[i_date, 'Fixed_Expected_Return'] if (expect_port_pct > expect_fixed_pct) and (expect_port_pct < expect_stock_pct): expect_stock_position = (expect_port_pct - expect_fixed_pct ) / (expect_stock_pct - expect_fixed_pct) expect_stock_position = np.min( [expect_stock_position, max_stock_position]) expect_stock_position = np.max( [expect_stock_position, min_stock_position]) elif expect_stock_pct < expect_fixed_pct: data.ix[i_date, "Cur_Close_Stock_Position"] = 0.0 data.ix[i_date, "Cur_Close_Fixed_Position"] = 1.0 expect_stock_position = 0.0 else: expect_stock_position = cur_stock_position # expect_stock_position = cur_stock_position stock_diff = np.abs(cur_stock_position - expect_stock_position) data.ix[i_date, 'Exp_Stock_Position'] = expect_stock_position if stock_diff > 0.04: diff = expect_stock_position - cur_stock_position print(" %s Postion Change %.2f%% Now %.2f%% " % (cur_date, diff * 100, expect_stock_position * 100)) data.ix[i_date, "Cur_Close_Stock_Position"] = expect_stock_position data.ix[ i_date, "Cur_Close_Fixed_Position"] = 1.0 - expect_stock_position data.ix[i_date, "Change"] = "Change" number += 1 data["Change_Position"] = data['Cur_Close_Stock_Position'].diff() res.ix[init_date, "stock_position"] = data["Cur_Close_Stock_Position"].mean() res.ix[init_date, "finish_nav"] = data['Portfolio_Nav'].values[-1] res.ix[init_date, "target_nav"] = target_nav res.ix[init_date, "finish_if"] = res.ix[init_date, "finish_nav"] >= res.ix[init_date, "target_nav"] res.ix[init_date, "number"] = number data_drop = data.dropna(subset=['Exp_Stock_Position']) if len(data_drop) > 0: res.ix[init_date, 'index_cum_pct'] = data_drop['Cum_Index_Pct'].values[-1] # 写入 #################################################################################################### num_format_pd = pd.DataFrame([], columns=data.columns, index=['format']) num_format_pd.ix['format', :] = '0.00%' num_format_pd.ix['format', [ 'PB_LF', 'Date_Number', 'Remain_Days', 'Init_Expected_PB', 'Cur_Expected_PB', 'Portfolio_Nav', 'Cur_Target_Nav' ]] = '0.00' save_path = 'C:\\Users\\doufucheng\\OneDrive\\Desktop\\data\\' begin_row_number = 0 begin_col_number = 1 color = "red" file_name = save_path + index_code + '_' + init_date + '_' + end_date + ".xlsx" sheet_name = "指数测试" write_pandas(file_name, sheet_name, begin_row_number, begin_col_number, data, num_format_pd, color) print(res) return res
def concat_exposure(halfyear_date, out_path): # 参数 ########################################################################################## # end_date = "20180715" # out_path = 'E:\\3_数据\\4_fund_data\\6_index_enhanced_fund_snowball\\' # 沪深300 指数、基金暴露拼接起来 ########################################################################################## index_name = '沪深300' index_code = "000300.SH" index_exposure = Index().get_index_exposure_date(index_code, halfyear_date) file = os.path.join(out_path, 'filter_fund_pool\\', '基金最终筛选池_' + index_name + '.xlsx') fund_code = pd.read_excel(file, index_col=[1], encoding='gbk') fund_code_list = list(fund_code.index) for i_fund in range(0, len(fund_code_list)): fund = fund_code_list[i_fund] exposure_add = Fund().get_fund_holder_exposure_date( fund, halfyear_date) if i_fund == 0: exposure_fund = exposure_add else: exposure_fund = pd.concat([exposure_fund, exposure_add], axis=0) exposure = pd.concat([exposure_fund, index_exposure], axis=0) num_format_pd = pd.DataFrame([], columns=exposure.columns, index=['format']) num_format_pd.ix['format', :] = '0.00' begin_row_number = 0 begin_col_number = 0 color = "red" file_name = os.path.join( out_path, "exposure", index_name, "BARRA风格暴露_" + index_name + "_" + Date().get_normal_date_month_end_day(halfyear_date) + ".xlsx") sheet_name = "BARRA风格暴露" write_pandas(file_name, sheet_name, begin_row_number, begin_col_number, exposure, num_format_pd, color) ########################################################################################## index_name = '中证500' index_code = "000905.SH" index_exposure = Index().get_index_exposure_date(index_code, halfyear_date) file = os.path.join(out_path, 'filter_fund_pool\\', '基金最终筛选池_' + index_name + '.xlsx') fund_code = pd.read_excel(file, index_col=[1], encoding='gbk') fund_code_list = list(fund_code.index) for i_fund in range(0, len(fund_code_list)): fund = fund_code_list[i_fund] exposure_add = Fund().get_fund_holder_exposure_date( fund, halfyear_date) if i_fund == 0: exposure_fund = exposure_add else: exposure_fund = pd.concat([exposure_fund, exposure_add], axis=0) exposure = pd.concat([exposure_fund, index_exposure], axis=0) num_format_pd = pd.DataFrame([], columns=exposure.columns, index=['format']) num_format_pd.ix['format', :] = '0.00' begin_row_number = 0 begin_col_number = 0 color = "red" file_name = os.path.join( out_path, "exposure", index_name, "BARRA风格暴露_" + index_name + "_" + Date().get_normal_date_month_end_day(halfyear_date) + ".xlsx") sheet_name = "BARRA风格暴露" write_pandas(file_name, sheet_name, begin_row_number, begin_col_number, exposure, num_format_pd, color)