def cal_fund_regression_exposure_index_all(self, beg_date, end_date, period="D", fund_pool="指数+主动股票+灵活配置60基金", file_rewrite=False): """ 回归基金池内所有基金指数暴露 """ quarter_date = Date().get_last_fund_quarter_date(end_date) fund_pool = FundPool().get_fund_pool_all(quarter_date, fund_pool) fund_pool = fund_pool[fund_pool['if_etf'] == "非ETF基金"] fund_pool = fund_pool[fund_pool['if_a'] == "A类基金"] fund_pool = fund_pool[fund_pool['if_connect'] == "非联接基金"] fund_pool = fund_pool[fund_pool['if_hk'] == "非港股基金"] fund_pool = fund_pool.reset_index(drop=True) fund_pool.index = fund_pool['wind_code'] print(len(fund_pool)) for i_fund in range(0, len(fund_pool)): fund_code = fund_pool.index[i_fund] fund_name = fund_pool.sec_name[i_fund] file = '%s_%s_%s.csv' % (self.file_prefix, self.folder_name, fund_code) out_file = os.path.join(self.index_exposure_path, file) if not os.path.exists(out_file) or file_rewrite: print(fund_name, fund_code) self.cal_fund_regression_exposure_index( fund_code, beg_date, end_date, period)
def __init__(self): FundHolder.__init__(self) FundFactor.__init__(self) FundPool.__init__(self) FundStatic.__init__(self) FundExposure.__init__(self)
def rank_excess_fund(self, fund_pool_name, ge_index_code, my_index_code, my_fund_code, beg_date, end_date): """ 计算某只基金在基金池的超额收益排名 这只基金指定基准 其他默认为windqa """ fund_pool = FundPool().get_fund_pool_all(date="20181231", name=fund_pool_name) fund_pool = fund_pool[fund_pool['setupdate'] < beg_date] fund_pool = list(fund_pool['wind_code'].values) fund_pool.append(my_fund_code) result = pd.DataFrame([], index=fund_pool) data = FundFactor().get_fund_factor("Repair_Nav") for i in range(0, len(fund_pool)): fund_code = fund_pool[i] if fund_code == my_fund_code: index_code = my_index_code else: index_code = ge_index_code try: print(fund_code, index_code, beg_date, end_date) fund = pd.DataFrame(data[fund_code]) index = Index().get_index_factor(index_code, attr=["CLOSE"]) fs = FinancialSeries(pd.DataFrame(fund), pd.DataFrame(index)) fund_return = fs.get_interval_return(beg_date, end_date) bench_return = fs.get_interval_return_benchmark( beg_date, end_date) result.loc[fund_code, "基准收益"] = bench_return result.loc[fund_code, "基金收益"] = fund_return result.loc[fund_code, "超额收益"] = -bench_return + fund_return except Exception as e: print(e) result = result.dropna() result = result[~result.index.duplicated()] result = result.sort_values(by=['超额收益'], ascending=False) result['收益名次'] = range(1, len(result) + 1) result['收益排名'] = result['收益名次'].map( lambda x: str(x) + '/' + str(len(result))) result['收益排名百分比'] = result['收益名次'].map(lambda x: x / len(result)) excess_return = result.loc[my_fund_code, "超额收益"] pct = result.loc[my_fund_code, "收益排名百分比"] rank_str = result.loc[my_fund_code, "收益排名"] result.to_csv( os.path.join( self.data_path, "超额收益_%s_%s_%s.csv" % (my_fund_code, beg_date, end_date))) return excess_return, pct, rank_str
def cal_weight_date(self, quarter_date): """ 单个季度公募主动股票基金平均权重 每个基金的权都为1 """ fund_pool = FundPool().get_fund_pool_code(name="普通股票型基金", date=quarter_date) for i_fund in range(len(fund_pool)): fund = fund_pool[i_fund] try: asset = self.total_asset.loc[fund, quarter_date] / 100000000 except Exception as e: asset = 0.5 try: fund_holding = FundHolder().get_fund_holding_quarter(fund=fund) fund_holding_date = pd.DataFrame(fund_holding[quarter_date]) fund_holding_date = fund_holding_date.dropna() fund_holding_date *= asset fund_holding_date.columns = [fund] except Exception as e: fund_holding_date = pd.DataFrame([], columns=[fund]) if i_fund == 0: stock_data = fund_holding_date else: stock_data = pd.concat([stock_data, fund_holding_date], axis=1) stock_data = stock_data.dropna(how='all') stock_data_weight = pd.DataFrame(stock_data.sum(axis=1)) return stock_data_weight
def cal_all_wind_file(self): """ 计算 所有季报日 普通股票型基金 基金平均持仓 还要考虑股票仓位 并生成wind文件""" date_series = Date().get_normal_date_series("20040101", datetime.today(), "Q") for i_date in range(len(date_series)): quarter_date = date_series[i_date] fund_pool = FundPool().get_fund_pool_code(name="普通股票型基金", date=quarter_date) stock_ratio = pd.DataFrame(self.stock_ratio.loc[fund_pool, quarter_date]) ratio = stock_ratio.median().values[0] / 100.0 stock_data_weight = self.cal_weight_date(quarter_date) stock_data_weight.columns = ["Weight"] stock_data_weight /= stock_data_weight.sum() publish_date = Date().get_trade_date_offset(quarter_date, -0) print(len(stock_data_weight)) stock_data_weight.index.name = "Code" stock_data_weight *= ratio stock_data_weight.loc['Cash', 'Weight'] = 1.0 - ratio stock_data_weight["CreditTrading"] = "No" stock_data_weight["Date"] = publish_date stock_data_weight["Price"] = 0.0 stock_data_weight["Direction"] = "Long" sub_path = os.path.join(self.wind_port_path, self.port_name) if not os.path.exists(sub_path): os.makedirs(sub_path) file = os.path.join(sub_path, '%s_%s.csv' % (self.port_name, publish_date)) stock_data_weight.to_csv(file)
def get_data_from_out_position(self): file1 = r'C:\Users\doufucheng\OneDrive\Desktop\普通股票型基金.csv' file2 = r'C:\Users\doufucheng\OneDrive\Desktop\偏股.csv' port_name = '天风股票基金仓位' fund_index_code = "885000.WI" data1 = pd.read_csv(file1, encoding='gbk') data2 = pd.read_csv(file2, encoding='gbk') data = pd.concat([data1, data2], axis=0) data = data.reset_index(drop=True) date_series = list(set(data.report_period.values)) date_series.sort() wind_port_path = WindPortUpLoad().path sub_path = os.path.join(wind_port_path, port_name) if not os.path.exists(sub_path): os.makedirs(sub_path) for i_date in range(len(date_series)): date = date_series[i_date] quarter_date = str(date) fund_pool = FundPool().get_fund_pool_code(name="基金持仓基准基金池", date=quarter_date) stock_ratio = pd.DataFrame(self.stock_ratio.loc[fund_pool, quarter_date]) ratio = stock_ratio.median().values[0] / 100.0 data_date = data[data.report_period == date] data_gb = data_date.groupby(by=['stock_code']).sum()['weight'] data_gb = pd.DataFrame(data_gb) data_gb.columns = ['Weight'] data_gb['Weight'] = data_gb['Weight'] / data_gb['Weight'].sum() data_date = data_gb publish_date = Date().get_trade_date_offset(date, 17) data_date.columns = ['Weight'] data_date['Weight'] *= ratio data_date.loc["Cash", "Weight"] = 1 - ratio data_date.index.name = 'Code' data_date["CreditTrading"] = "No" data_date["Date"] = publish_date data_date["Price"] = 0.0 data_date["Direction"] = "Long" file = os.path.join(sub_path, '%s_%s.csv' % (port_name, publish_date)) data_date.to_csv(file) backtest = BackTest() backtest.set_info(port_name, fund_index_code) backtest.read_weight_at_all_change_date() backtest.cal_weight_at_all_daily() backtest.cal_port_return(beg_date="20040101") backtest.cal_turnover(annual_number=4) backtest.cal_summary(all_beg_date="20040101")
def cal_style_position_all_fund(self, beg_date, end_date): """ 计算所有基金风格仓位和仓位 利用OLS无约束回归 """ fund_pool = FundPool().get_fund_pool_code(name="基金持仓基准基金池", date="20180630") for i_fund in range(len(fund_pool)): fund = fund_pool[i_fund] self.cal_style_position(beg_date, end_date, fund)
def cal_fund_regression_risk_alpha_return_style_all( self, beg_date, end_date, fund_pool="基金持仓基准基金池"): quarter_date = Date().get_last_fund_quarter_date(end_date) fund_pool = FundPool().get_fund_pool_code(quarter_date, fund_pool) for i_fund in range(200, len(fund_pool)): fund_code = fund_pool[i_fund] self.cal_fund_regression_risk_alpha_return_style( fund_code, beg_date, end_date)
def cal_factor_mrar_all(self, T, r, beg_date, end_date): date_series = Date().get_normal_date_series(beg_date, end_date, "Q") result = pd.DataFrame([], index=date_series) def fun_date(x): year = int(x[0:4]) month = int(x[4:6]) day = calendar.monthrange(year, month)[1] date = datetime(year, month, day).strftime("%Y%m%d") return date macro_code = "S0059744" macro_name = "中债国债到期收益率-1年" macro_data = Macro().get_macro_data(macro_code, None, None) macro_data.columns = [macro_name] macro_data['YearMonth'] = macro_data.index.map(lambda x: x[0:6]) macro_data = macro_data.groupby(by=['YearMonth']).mean()[macro_name] macro_data.index = macro_data.index.map(fun_date) macro_data = pd.DataFrame(macro_data) macro_data.columns = [macro_name] macro_data /= 12.0 fund_data = Fund().get_fund_factor("Repair_Nav_Pct", None, None) for i in range(len(date_series)): # 日期 ###################################################################################################### report_date = date_series[i] # 基金池信息 ###################################################################################################### fund_code_list = FundPool().get_fund_pool_code(date=report_date, name="基金持仓基准基金池") fund_code_list3 = FundPool().get_fund_pool_code(date=report_date, name="量化基金") fund_code_list2 = FundPool().get_fund_pool_code(date="20180630", name="东方红基金") fund_code_list.extend(fund_code_list2) fund_code_list.extend(fund_code_list3) fund_code_list = list(set(fund_code_list)) fund_code_list.sort() for i_fund in range(len(fund_code_list)): fund = fund_code_list[i_fund] print(report_date, fund) try: res = self.cal_factor_mrar(fund, T, r, end_date, fund_data, macro_data) result.loc[report_date, fund] = res except Exception as e: result.loc[report_date, fund] = np.nan result = result.T file = os.path.join(self.path, "MorningStar_MRAR_" + str(r) + "_" + str(T) + '.csv') result.to_csv(file)
def cal_fund_holder_exposure_all(self, beg_date="19991231", end_date=datetime.today(), fund_pool="基金持仓基准基金池"): quarter_date = Date().get_last_fund_quarter_date(end_date) fund_pool = FundPool().get_fund_pool_code(quarter_date, fund_pool) for i_fund in range(0, len(fund_pool)): fund_code = fund_pool[i_fund] self.cal_fund_holder_exposure(fund_code, beg_date, end_date)
def cal_fund_regression_exposure_style_all(self, beg_date, end_date, period="M", fund_pool="基金持仓基准基金池"): quarter_date = Date().get_last_fund_quarter_date(end_date) fund_pool = FundPool().get_fund_pool_code(quarter_date, fund_pool) for i_fund in range(0, len(fund_pool)): fund_code = fund_pool[i_fund] self.cal_fund_regression_exposure_style(fund_code, beg_date, end_date, period)
def cal_fund_holder_exposure_quarter_all( self, beg_date="19991231", end_date=datetime.today().strftime("%Y%m%d"), fund_pool="股票+灵活配置60型基金"): """ 计算所有基金的季度持仓暴露 (前十大重仓暴露) """ quarter_date = Date().get_last_fund_quarter_date(end_date) fund_pool = FundPool().get_fund_pool_code(quarter_date, fund_pool) for i_fund in range(0, len(fund_pool)): fund_code = fund_pool[i_fund] self.cal_fund_holder_exposure_quarter(fund_code, beg_date, end_date)
def get_etf_fund_data(self, beg_date, end_date): """ 得到etf数据""" print("ETF Data %s %s" % (beg_date, end_date)) exchange_share = FundFactor().get_fund_factor("Exchange_Share") exchange_share = exchange_share.fillna(method='pad', limit=3) unit_nav = FundFactor().get_fund_factor("Unit_Nav") unit_nav = unit_nav.fillna(method='pad', limit=1) exchange_share_date = pd.DataFrame(exchange_share.T[end_date]) exchange_share_date.columns = ['Share'] exchange_share_date_last = pd.DataFrame(exchange_share.T[beg_date]) exchange_share_date_last.columns = ['ShareLast'] unit_nav_date = pd.DataFrame(unit_nav.T[end_date]) unit_nav_date.columns = ['UnitNav'] fund_pool = FundPool().get_fund_pool_all(name="ETF基金", date="20181231") fund_pool = fund_pool[[ 'sec_name', 'wind_code', 'setupdate', 'bench_code', 'bench_name' ]] fund_pool.index = fund_pool.wind_code concat_data = pd.concat([ fund_pool, unit_nav_date, exchange_share_date, exchange_share_date_last ], axis=1) concat_data = concat_data.dropna() concat_data['MvEnd'] = concat_data['Share'] * concat_data['UnitNav'] concat_data['Inflow'] = ( concat_data['Share'] - concat_data['ShareLast']) * concat_data['UnitNav'] concat_data['MvEnd'] /= 100000000.0 concat_data['Inflow'] /= 100000000.0 return concat_data
def cal_all_wind_file(self): """ 计算 所有季报日 普通股票型基金 基金平均持仓 还要考虑股票仓位 并生成wind文件""" date_series = Date().get_normal_date_series("20040101", datetime.today(), "S") for i_date in range(len(date_series)): quarter_date = date_series[i_date] fund_pool = FundPool().get_fund_pool_code(name="普通股票型基金", date=quarter_date) stock_ratio = pd.DataFrame(self.stock_ratio.loc[fund_pool, quarter_date]) ratio = stock_ratio.median().values[0] / 100.0 stock_data_weight = self.cal_weight_date(quarter_date) stock_data_weight.columns = ["Weight"] stock_data_weight /= stock_data_weight.sum() publish_date = Date().get_trade_date_offset(quarter_date, 0) # quarter_next_date = Date().get_trade_date_offset(quarter_date, 1) # # # pct_period = self.pct.T.loc[quarter_next_date:publish_date, :] # multi = pd.DataFrame((pct_period / 100.0 + 1.0).cumprod().iloc[-1, :]) # multi.columns = ['Multi'] # # stock_data_weight = pd.concat([stock_data_weight, multi], axis=1) # stock_data_weight = stock_data_weight.dropna() # stock_data_weight['Weight'] = stock_data_weight['Weight'] * stock_data_weight['Multi'] # stock_data_weight['Weight'] /= stock_data_weight['Weight'].sum() print(len(stock_data_weight)) stock_data_weight.index.name = "Code" stock_data_weight *= ratio stock_data_weight.loc['Cash', 'Weight'] = 1.0 - ratio stock_data_weight["CreditTrading"] = "No" stock_data_weight["Date"] = publish_date stock_data_weight["Price"] = 0.0 stock_data_weight["Direction"] = "Long" sub_path = os.path.join(self.wind_port_path, self.port_name) if not os.path.exists(sub_path): os.makedirs(sub_path) file = os.path.join(sub_path, '%s_%s.csv' % (self.port_name, publish_date)) stock_data_weight.to_csv(file)
def cal_fund_holder_exposure_halfyear_all(self, beg_date="19991231", end_date=datetime.today(), fund_pool="股票+灵活配置60型基金"): """ 计算所有基金的半年持仓暴露(注意计算的是非满仓暴露) """ beg_date = Date().change_to_str(beg_date) end_date = Date().change_to_str(end_date) quarter_date = Date().get_last_fund_quarter_date(end_date) fund_pool = FundPool().get_fund_pool_code(quarter_date, fund_pool) for i_fund in range(0, len(fund_pool)): fund_code = fund_pool[i_fund] self.cal_fund_holder_exposure_halfyear(fund_code, beg_date, end_date)
def load_wind_fund_asset(self, date=None): if date is None: date = Date().get_normal_date_series(period='Q')[-2] print(date) code = FundPool().get_fund_pool_code(date, "基金持仓基准基金池") code_str = ','.join(list(code)) data = w.wss(code_str, "prt_fundnetasset_total", "unit=1;rptDate=" + str(date)) data = pd.DataFrame(data.Data, columns=data.Codes, index=["FundAsset"]).T out_path = Parameter().get_load_out_file("Fund_Asset") out_file = "基金规模_" + date + '.csv' out_file = os.path.join(out_path, out_file) data.to_csv(out_file)
def cal_fund_holder_return_quarter_backtest_all(self, T, beg_date, end_date, col="AlphaReturn", type="Mean"): date_series = Date().get_normal_date_series(beg_date, end_date, "Q") result = pd.DataFrame([], index=date_series) for i in range(len(date_series)): # 日期 ###################################################################################################### report_date = date_series[i] # 基金池信息 ###################################################################################################### fund_code_list = FundPool().get_fund_pool_code(date=report_date, name="基金持仓基准基金池") fund_code_list3 = FundPool().get_fund_pool_code(date=report_date, name="量化基金") fund_code_list2 = FundPool().get_fund_pool_code(date="20180630", name="东方红基金") fund_code_list.extend(fund_code_list2) fund_code_list.extend(fund_code_list3) fund_code_list = list(set(fund_code_list)) fund_code_list.sort() for i_fund in range(len(fund_code_list)): fund = fund_code_list[i_fund] print(report_date, fund) try: res = self.cal_fund_holder_return_quarter_backtest( fund, T, report_date, col, type) result.loc[report_date, fund] = res except Exception as e: result.loc[report_date, fund] = np.nan result = result.T file = os.path.join( self.path, "FundHolderQuarter_" + col + type + "_" + str(T) + '.csv') result.to_csv(file)
def cal_quarter_holding_allfund_quarter(self, quarter_date): """ 计算 季报日 普通股票+偏股混合基金 基金平均持仓 """ fund_pool = FundPool().get_fund_pool_code(name="基金持仓基准基金池", date=quarter_date) halfyear_date = Date().get_last_fund_halfyear_date( Date().get_trade_date_offset(quarter_date, 15)) fund_turnover = Fund().get_fund_turnover() fund_turnover = fund_turnover.loc[fund_pool, :] fund_turnover[fund_turnover < 15] = np.nan fund_turnover_date = pd.DataFrame(fund_turnover[halfyear_date]) fund_turnover_date = fund_turnover_date.dropna() fund_turnover_date = fund_turnover_date.sort_values(by=[halfyear_date], ascending=True) fund_pool = list( fund_turnover_date.index[0:int(len(fund_turnover_date) / 2)]) for i_fund in range(len(fund_pool)): fund = fund_pool[i_fund] try: fund_holding = FundHolder().get_fund_holding_quarter(fund=fund) fund_holding_date = pd.DataFrame(fund_holding[quarter_date]) fund_holding_date = fund_holding_date.dropna() fund_holding_date *= 1.0 fund_holding_date.columns = [fund] except Exception as e: fund_holding_date = pd.DataFrame([], columns=[fund]) if i_fund == 0: stock_data = fund_holding_date else: stock_data = pd.concat([stock_data, fund_holding_date], axis=1) stock_data = stock_data.dropna(how='all') stock_data_weight = pd.DataFrame(stock_data.sum(axis=1)) stock_data_weight.columns = ["WEIGHT"] stock_data_weight /= stock_data_weight.sum() stock_data_weight.index.name = "CODE" sub_path = os.path.join(self.data_weight_path, self.name) if not os.path.exists(sub_path): os.makedirs(sub_path) file = os.path.join(sub_path, quarter_date + '_QuarterHolding.csv') stock_data_weight.to_csv(file)
def cal_all_wind_file(self): """ 计算 所有季报日 公募主动股票基金 基金平均持仓 还要考虑股票仓位 并生成wind文件""" date_series = Date().get_trade_date_series("20040601", datetime.today(), "W") for i_date in range(len(date_series)): date = date_series[i_date] quarter_date = Date().get_last_fund_quarter_date(date) fund_pool = FundPool().get_fund_pool_code(name="基金持仓基准基金池", date=quarter_date) stock_ratio = pd.DataFrame(self.stock_ratio.loc[fund_pool, quarter_date]) holding_ratio = stock_ratio.median().values[0] / 100.0 stock_data_weight = self.cal_weight_date(quarter_date) stock_data_weight.columns = ["Weight"] stock_data_weight["Weight"] = stock_data_weight["Weight"] / stock_data_weight["Weight"].sum() regress_ratio = self.regression_ratio(date) diff = Date().get_trade_date_diff(quarter_date, date) if np.isnan(regress_ratio): ratio = holding_ratio else: length = 80 ratio = regress_ratio * (diff / length) + holding_ratio * (length - diff) / length print(date, quarter_date, diff, regress_ratio, holding_ratio, ratio) stock_data_weight *= ratio stock_data_weight.index.name = "Code" stock_data_weight.loc['Cash', 'Weight'] = 1.0 - stock_data_weight['Weight'].sum() stock_data_weight["CreditTrading"] = "No" stock_data_weight["Date"] = date stock_data_weight["Price"] = 0.0 stock_data_weight["Direction"] = "Long" sub_path = os.path.join(self.wind_port_path, self.port_name) if not os.path.exists(sub_path): os.makedirs(sub_path) file = os.path.join(sub_path, '%s_%s.csv' % (self.port_name, date)) stock_data_weight.to_csv(file)
def cal_fund_holder_risk_alpha_return_quarter_all( self, beg_date="20040101", end_date=datetime.today().strftime("%Y%m%d"), fund_pool="股票+灵活配置60型基金"): """ 根据季报持仓风格暴露进行收益拆分 所有基金 """ date_series = Date().get_normal_date_series(beg_date, end_date, "Q") for i_date in range(0, len(date_series)): end_date = date_series[i_date] end_date = Date().get_trade_date_series(end_date, 15) quarter_date = Date().get_last_fund_quarter_date(end_date) fund_pool_list = FundPool().get_fund_pool_code( quarter_date, fund_pool) for i_fund in range(0, len(fund_pool_list)): fund_code = fund_pool_list[i_fund] self.cal_fund_holder_risk_alpha_return_quarter( fund_code, end_date)
def cal_fund_regression_return_index_backtest_all(self, T, beg_date, end_date, col="AlphaReturn", type="Mean"): date_series = Date().get_trade_date_series(beg_date, end_date, "M") result = pd.DataFrame([], index=date_series) # 基金池信息 ###################################################################################################### fund_code_list = FundPool().get_fund_pool_code(date="20180630", name="基金持仓基准基金池") # fund_code_list3 = FundPool().get_fund_pool_code(date="20180630", name="量化基金") fund_code_list2 = FundPool().get_fund_pool_code(date="20180630", name="东方红基金") # fund_code_list4 = FundPool().get_fund_pool_code(date="20180630", name="指数型基金") fund_code_list.extend(fund_code_list2) # fund_code_list.extend(fund_code_list3) # fund_code_list.extend(fund_code_list4) fund_code_list = list(set(fund_code_list)) fund_code_list.sort() for i_fund in range(len(fund_code_list)): fund = fund_code_list[i_fund] print(fund) if i_fund == 0: result = self.cal_fund_regression_return_index_backtest( fund, T, date_series, col, type) else: result_add = self.cal_fund_regression_return_index_backtest( fund, T, date_series, col, type) result = pd.concat([result, result_add], axis=1) result = result.T file = os.path.join( self.path, "FundRegressionIndex_" + col + type + "_" + str(T) + '.csv') result.to_csv(file)
def cal_all_wind_file(self): """ 计算 季报日 普通股票+偏股混合基金 基金平均持仓 还要考虑平均仓位 """ for i_date in range(len(self.date_series)): date = self.date_series[i_date] quarter_date = Date().get_last_fund_quarter_date(date) fund_pool = FundPool().get_fund_pool_code(name="基金持仓基准基金池", date=quarter_date) stock_ratio = pd.DataFrame(self.stock_ratio.loc[fund_pool, quarter_date]) ratio = stock_ratio.median().values[0] / 100.0 stock_data_weight = self.cal_weight_date(date, quarter_date) stock_data_weight /= stock_data_weight.sum() stock_data_weight.columns = ["Weight"] print(len(stock_data_weight)) stock_data_weight.index.name = "Code" stock_data_weight *= ratio stock_data_weight.loc['Cash', 'Weight'] = 1.0 - ratio stock_data_weight["CreditTrading"] = "No" stock_data_weight["Date"] = date stock_data_weight["Price"] = 0.0 stock_data_weight["Direction"] = "Long" sub_path = os.path.join(self.wind_port_path, self.port_name) if not os.path.exists(sub_path): os.makedirs(sub_path) file = os.path.join(sub_path, '%s_%s.csv' % (self.port_name, publish_date)) stock_data_weight.to_csv(file)
def rank_fund2(self, fund_pct, bench_pct, fund_code, rank_pool, beg_date, end_date, new_fund_date=None, excess=False): """ 计算某只基金在基金池的排名 三种排名方式 1、直接获取wind接口结果 2、自己给定基金池,本地基金数据取得基金绝对收益 3、基金给定基金池,本地基金数据获取基金超额收益 """ if new_fund_date is None: new_fund_date = beg_date beg_date = Date().change_to_str(beg_date) end_date = Date().change_to_str(end_date) new_fund_date = Date().change_to_str(new_fund_date) print(" 正在计算基金排名 %s 在基金池 %s 从 %s 到 %s " % (fund_code, rank_pool, beg_date, end_date)) # 分类获取排名 if rank_pool == 'wind': # Wind 三级分类 date_str = "startDate=%s;endDate=%s;fundType=3" % (beg_date, end_date) data = w.wss(fund_code, "peer_fund_return_rank_per", date_str) val = str(data.Data[0][0]) data = w.wss(fund_code, "peer_fund_return_rank_prop_per", date_str) try: pct = np.round(data.Data[0][0] / 100.0, 3) except Exception as e: print(e) print("wind返回基金排名百分比非数字") pct = "None" return val, pct else: # 获取基金池 pool = FundPool().get_fund_pool_all(date="20181231", name=rank_pool) bool_series = (pool['if_connect'] == '非联接基金') & (pool['if_hk'] == '非港股基金') bool_series &= (pool['if_a'] == 'A类基金') bool_series &= (pool['if_etf'] == '非ETF基金') pool = pool[bool_series] if not excess: # 区间总收益排名 # fund_pct = Fund().get_fund_factor("Repair_Nav_Pct") fund_pct = fund_pct.loc[beg_date:end_date, pool.index] fund_pct = fund_pct.dropna(how='all') data = (fund_pct / 100.0 + 1.0).cumprod() - 1.0 data = pd.DataFrame(data.iloc[-1, :]) data.columns = ['Pct'] data = data[~data.index.duplicated()] data = data.dropna() data = pd.concat([data, pool], axis=1) data = data[data["setupdate"] <= new_fund_date] data = data.dropna(subset=['Pct']) data = data.sort_values(by='Pct', ascending=False) data['range'] = range(len(data)) data["rank"] = data['range'].map( lambda x: str(x + 1) + "/" + str(len(data))) data['rank_pct'] = data['range'].map((lambda x: (x + 1) / len(data))) try: val = data.loc[fund_code, "rank"] pct = data.loc[fund_code, "rank_pct"] pct = np.round(pct, 3) file = "%s_%s_%s_%s.csv" % (fund_code, rank_pool, beg_date, end_date) file = os.path.join(self.data_path, file) data.to_csv(file) except Exception as e: print(e) val = "None" pct = "None" return val, pct else: # 区间超额收益排名 # fund_pct = Fund().get_fund_factor("Repair_Nav_Pct") # bench_pct = Fund().get_fund_factor("Fund_Bench_Pct") * 100 excess_pct = fund_pct.sub(bench_pct) excess_pct = excess_pct.loc[beg_date:end_date, pool.index] excess_pct = excess_pct.dropna(how='all') data = (excess_pct / 100.0 + 1.0).cumprod() - 1.0 data = pd.DataFrame(data.iloc[-1, :]) data.columns = ['Pct'] data = data[~data.index.duplicated()] data = data.dropna() data = pd.concat([data, pool], axis=1) data = data[data["setupdate"] <= new_fund_date] data = data.dropna(subset=['Pct']) data = data.sort_values(by='Pct', ascending=False) data['range'] = range(len(data)) data["rank"] = data['range'].map( lambda x: str(x + 1) + "/" + str(len(data))) data['rank_pct'] = data['range'].map((lambda x: (x + 1) / len(data))) try: val = data.loc[fund_code, "rank"] pct = data.loc[fund_code, "rank_pct"] pct = np.round(pct, 3) file = "%s_%s_%s_%s.csv" % (fund_code, rank_pool, beg_date, end_date) file = os.path.join(self.data_path, file) data.to_csv(file) except Exception as e: print(e) val = "None" pct = "None" return val, pct
def rank_fund(self, fund_code, rank_pool, beg_date, end_date, new_fund_date=None, excess=False): """ 计算某只基金在基金池的排名 三种排名方式 1、直接获取wind接口结果 2、自己给定基金池,从wind接口获取基金绝对收益 3、基金给定基金池,从wind接口获取基金超额收益 """ if new_fund_date is None: new_fund_date = beg_date beg_date = Date().change_to_str(beg_date) end_date = Date().change_to_str(end_date) new_fund_date = Date().change_to_str(new_fund_date) print(" 正在计算基金排名 %s 在基金池 %s 从 %s 到 %s " % (fund_code, rank_pool, beg_date, end_date)) # 分类获取排名 if rank_pool == 'wind': # Wind 三级分类 date_str = "startDate=%s;endDate=%s;fundType=3" % (beg_date, end_date) data = w.wss(fund_code, "peer_fund_return_rank_per", date_str) val = str(data.Data[0][0]) data = w.wss(fund_code, "peer_fund_return_rank_prop_per", date_str) try: pct = np.round(data.Data[0][0] / 100.0, 3) except Exception as e: print(e) print("wind返回基金排名百分比非数字") pct = "None" return val, pct else: # 获取基金池 pool = FundPool().get_fund_pool_all(date="20181231", name=rank_pool) bool_series = (pool['if_connect'] == '非联接基金') & (pool['if_hk'] == '非港股基金') bool_series &= (pool['if_a'] == 'A类基金') bool_series &= (pool['if_etf'] == '非ETF基金') pool = pool[bool_series] fund_code_str = ','.join(pool.index.values) if not excess: # 区间总收益排名 data = w.wss(fund_code_str, "NAV_adj_return", "startDate=%s;endDate=%s" % (beg_date, end_date)) data = pd.DataFrame(data.Data, columns=data.Codes, index=['NAV_adj_return']).T data = data[~data.index.duplicated()] data = pd.concat([data, pool], axis=1) data = data[data["setupdate"] <= new_fund_date] data = data.dropna(subset=['NAV_adj_return']) data = data.sort_values(by='NAV_adj_return', ascending=False) data['range'] = range(len(data)) data["rank"] = data['range'].map( lambda x: str(x + 1) + "/" + str(len(data))) data['rank_pct'] = data['range'].map((lambda x: (x + 1) / len(data))) try: val = data.loc[fund_code, "rank"] pct = data.loc[fund_code, "rank_pct"] pct = np.round(pct, 3) file = "%s_%s_%s_%s.csv" % (fund_code, rank_pool, beg_date, end_date) file = os.path.join(self.data_path, file) data.to_csv(file) except Exception as e: print(e) val = "None" pct = "None" return val, pct else: # 区间超额收益排名 date_str = "startDate=%s;endDate=%s" % (beg_date, end_date) data = w.wss(fund_code_str, "NAV_over_bench_return_per", date_str) data = pd.DataFrame(data.Data, columns=data.Codes, index=['NAV_over_bench_return_per']).T data = pd.concat([data, pool], axis=1) data = data[data["setupdate"] <= new_fund_date] data = data.dropna(subset=['NAV_over_bench_return_per']) data = data.sort_values(by='NAV_over_bench_return_per', ascending=False) data['range'] = range(len(data)) data["rank"] = data['range'].map( lambda x: str(x + 1) + "/" + str(len(data))) data['rank_pct'] = data['range'].map((lambda x: (x + 1) / len(data))) try: val = data.loc[fund_code, "rank"] pct = data.loc[fund_code, "rank_pct"] pct = np.round(pct, 3) file = "%s_%s_%s_%s.csv" % (fund_code, rank_pool, beg_date, end_date) file = os.path.join(self.data_path, file) data.to_csv(file) except Exception as e: print(e) val = "None" pct = "None" return val, pct
def cal_weight_date(self, date, quarter_date): days_diff = Date().get_trade_date_diff(quarter_date, date) fund_pool = FundPool().get_fund_pool_code(name="基金持仓基准基金池", date=quarter_date) for i_fund in range(len(fund_pool)): fund = fund_pool[i_fund] try: fund_holding = FundHolder().get_fund_holding_quarter(fund=fund) fund_holding_date = pd.DataFrame(fund_holding[quarter_date]) fund_holding_date = fund_holding_date.dropna() fund_holding_date *= 1.0 fund_holding_date.columns = [fund] except Exception as e: fund_holding_date = pd.DataFrame([], columns=[fund]) if i_fund == 0: stock_data = fund_holding_date else: stock_data = pd.concat([stock_data, fund_holding_date], axis=1) stock_data = stock_data.dropna(how='all') stock_data_weight = pd.DataFrame(stock_data.sum(axis=1)) stock_data_weight /= stock_data_weight.sum() stock_data_weight.columns = ["Weight"] stock_data_weight = stock_data_weight.sort_values(by=['Weight'], ascending=False) stock_ratio = pd.DataFrame(self.stock_ratio.loc[fund_pool, quarter_date]) ratio = stock_ratio.median().values[0] / 100.0 if days_diff > 30: # 得到股票和基金涨跌幅 stock_pool = list(stock_data_weight.index) beg_date = Date().get_trade_date_offset(date, -61) date_series = Date().get_trade_date_series(beg_date, date) f_pct = self.index_return / ratio s_pct = self.stock_return.loc[date_series, stock_pool] s_pct = s_pct.T.dropna(how='all').T s_pct = s_pct.dropna(how='all') f_pct = f_pct.dropna() # 准备数据Lasso回归 data = pd.concat([f_pct, s_pct], axis=1) data = data.loc[beg_date:date, :] data = data.dropna(subset=['IndexReturn']) data = data.fillna(0.0) y = np.row_stack(data['IndexReturn'].values) x = data.iloc[:, 1:].values model = LassoCV(fit_intercept=True, positive=True) # LassoCV自动调节alpha可以实现选择最佳的alpha model.fit(x, y) print(model.alpha_) alpha = model.alpha_ model = Lasso(alpha=alpha, fit_intercept=False, positive=True) model.fit(x, y) res = pd.DataFrame(model.coef_[model.coef_ > 0.0001], index=s_pct.columns[model.coef_ > 0.0001], columns=[date]) res = res.sort_values(by=[date], ascending=False) result = pd.concat([res, stock_data_weight], axis=1) result = result.sort_values(by=['Weight'], ascending=False) else: result = stock_data_weight return result