def StockBarraDecomposeReturnQuarter(report_date): """ """ T = 20 beg_date = Date().get_trade_date_offset(report_date, -T) end_date = Date().get_trade_date_offset(report_date, T) date_series = Date().get_trade_date_series(beg_date, end_date) result = {} for i in range(len(date_series)): date = date_series[i] residual = Barra().get_stock_residual_return_date(date) riskfactor = Barra().get_stock_riskfactor_return_date(date) all_return = pd.concat([residual, riskfactor], axis=1) result[date] = all_return result_panel = pd.Panel(result) pct_sum = result_panel.sum(axis=0) barra_name = Barra().get_factor_name(['STYLE']) barra_name = list(barra_name['NAME_EN'].values) pct_sum['STYLE'] = pct_sum.ix[:, barra_name].sum(axis=1) barra_name = Barra().get_factor_name(['INDUSTRY']) barra_name = list(barra_name['NAME_EN'].values) pct_sum['INDUSTRY'] = pct_sum.ix[:, barra_name].sum(axis=1) print(" StockBarraDecomposeReturnQuarter %s" % report_date) pct_sum.to_csv(file)
def cal_mfc_holding_barra_exposure_date(fund_name, date): """ 计算某只基金在某天的暴露 """ date = Date().get_trade_date_offset(date, 0) type_list = ["STYLE", "COUNTRY", "INDUSTRY"] try: holding_data = MfcGetData().get_fund_security(date) holding_data = holding_data[["基金名称", "证券代码", "市值", '证券类别']] holding_data = holding_data[holding_data["基金名称"] == fund_name] holding_data = holding_data[holding_data['证券类别'] == "股票"] holding_data.columns = ["FundName", "StockCode", "Weight", 'Type'] exposure = Barra().get_factor_exposure_date(date, type_list=type_list) holding_data['Weight'] = holding_data['Weight'] / holding_data[ 'Weight'].sum() holding_data.StockCode = holding_data.StockCode.map( CodeFormat().stock_code_add_postfix) holding_data.index = holding_data.StockCode weight = holding_data data = pd.concat([weight, exposure], axis=1) data = data.sort_values(by=['Weight'], ascending=True) data = data.dropna(subset=["Weight"]) res = pd.DataFrame([], columns=exposure.columns, index=[date]) if data['Weight'].sum() > 0.0: for i_col in range(len(exposure.columns)): risk_factor_name = exposure.columns[i_col] exposure_sum = (data["Weight"] * data[risk_factor_name]).sum() res.ix[date, risk_factor_name] = exposure_sum / data[ 'Weight'].sum() print(" Calculate Mfcteda Fund %s Barra Exposure at %s" % (fund_name, date)) else: print( " Calculate Mfcteda Fund %s At %s of Weight Stock is Zero" % (fund_name, date)) return res except Exception as e: print(" Calculate Mfcteda Fund %s Barra Exposure at %s is Null " % (fund_name, date)) name = Barra().get_factor_name(type_list=type_list) res = pd.DataFrame([], columns=list(name.NAME_EN.values), index=[date]) return res
def get_fund_risk_exposure(self, end_date): """ 基金持仓暴露(年报或者半年报) """ type_list = ["STYLE", "COUNTRY"] fund_pool = self.get_fund_pool(end_date) last_halfyear_date = Date().get_last_fund_halfyear_date(end_date) print("Project Nice Stock Fund Getting Fund Style Exposure at %s ......" % last_halfyear_date) col = Barra().get_factor_name(type_list=type_list)['NAME_EN'].values fund_factor = pd.DataFrame([], index=fund_pool.index, columns=col) fund_exposure = FundExposure() for i_fund in range(len(fund_pool.index)): fund_code = fund_pool.index[i_fund] try: exposure = fund_exposure.get_fund_holder_exposure_halfyear_date(fund_code, last_halfyear_date, type_list) fund_factor.loc[fund_code, exposure.columns] = exposure.loc[fund_code, exposure.columns] print("Exposure Style", fund_code, last_halfyear_date) except Exception as e: print(e) print("Exposure Style is None", fund_code, last_halfyear_date) return fund_factor
def update_data(self): """ 更新数据 指数风格行业暴露 股票协方差矩阵等等 """ end_date = datetime.today() beg_date = Date().get_trade_date_offset(end_date, -10) Index().cal_index_exposure(self.benchmark_code, beg_date, end_date) Barra().cal_stock_covariance_period(beg_date, end_date)
def cal_mfc_holding_barra_exposure_period(self, fund_name, beg_date, end_date): """ 计算某只基金在一段时间内暴露 """ date_series_daily = Date().get_trade_date_series(beg_date, end_date) new_data = pd.DataFrame() for i_date in range(len(date_series_daily)): date = date_series_daily[i_date] res = self.cal_mfc_holding_barra_exposure_date(fund_name, date) new_data = pd.concat([new_data, res], axis=0) out_file = os.path.join(self.exposure_data_path, "MfcRiskExposure_" + fund_name + '.csv') if os.path.exists(out_file): data = pd.read_csv(out_file, encoding='gbk', index_col=[0]) data.index = data.index.map(str) data = FactorOperate().pandas_add_row(data, new_data) else: data = new_data type_list = ["STYLE", "COUNTRY", "INDUSTRY"] name = Barra().get_factor_name(type_list=type_list) data = data[list(name.NAME_EN.values)] data.to_csv(out_file)
def update_data(self): """ 更新需要的数据 """ # 更新开始和结束时间 end_date = Date().change_to_str(datetime.today()) # 基金基本情况和股票基本情况 Fund().load_findb_fund_info() Fund().load_findb_sec_info() Fund().load_wind_fund_info() # 基金净值数据 和指数价格额数据 beg_date = Date().get_trade_date_offset(end_date, -20) Fund().load_fund_factor_all(beg_date, end_date) Index().load_index_factor_all(beg_date, end_date) # 计算基金和指数暴露 Barra().load_barra_data() beg_date = Date().get_trade_date_offset(end_date, -20) Index().cal_index_exposure("000300.SH", beg_date=beg_date, end_date=end_date) Index().cal_index_exposure("000905.SH", beg_date=beg_date, end_date=end_date)
def load_data(): """ 更新数据 """ print(" 更新本周数据 ") # 参数 today = datetime.today().strftime("%Y%m%d") # 更新日期(早晨已经更新日期序列) Date().load_trade_date_series_all() # 股票因子数据(网盘h5下载数据) Stock().load_h5_primary_factor() # 更新 Barra数据 beg_date = Date().get_trade_date_offset(today, -5) Barra().update_barra(beg_date, today) # 更新Fund(基础数据、因子数据、持仓数据) beg_date = Date().get_trade_date_offset(today, -90) Fund().update_fund_data(beg_date, today) # 更新Index(因为IndexWeight每天更新,这里不用更新) # 需要wind流量 beg_date = Date().get_trade_date_offset(today, -5) Index().load_index_factor_all(beg_date, today) # Stock静态数据,例如股票池、成立日期等等 # 需要wind流量 beg_date = Date().get_trade_date_offset(today, -5) Stock().load_stock_static_data_all(beg_date, today) os.system("pause")
def cal_index_exposure_date(self, index_code, date): """ 计算某个时间点的BARRA暴露 """ type_list = ["STYLE", "COUNTRY", "INDUSTRY"] print("Calculating Index %s Barra Exposure at %s" % (index_code, date)) try: weight = IndexWeight().get_weight_date(index_code, date) exposure = Barra().get_factor_exposure_date(date, type_list) data = pd.concat([weight, exposure], axis=1) data = data.dropna(subset=["WEIGHT"]) res = pd.DataFrame([], columns=exposure.columns, index=[date]) for i_col in range(len(exposure.columns)): risk_factor_name = exposure.columns[i_col] res.ix[date, risk_factor_name] = (data["WEIGHT"] * data[risk_factor_name]).sum() / data['WEIGHT'].sum() data = pd.concat([weight, exposure], axis=1) data = data.dropna(subset=["WEIGHT"]) res = pd.DataFrame([], columns=exposure.columns, index=[date]) for i_col in range(len(exposure.columns)): risk_factor_name = exposure.columns[i_col] res.ix[date, risk_factor_name] = (data["WEIGHT"] * data[risk_factor_name]).sum() / data['WEIGHT'].sum() except Exception as e: res = pd.DataFrame([]) return res
def update_data(self): """ 更新归因需要的数据 """ today = datetime.today().strftime("%Y%m%d") today = Date().get_trade_date_offset(today, -1) beg_date = Date().get_trade_date_offset(today, -25) Barra().load_barra_data() MfcData().cal_mfc_private_fund_nav_all() MfcData().load_mfc_public_fund_nav() param = MfcData().get_mfc_fund_info() param.index = param.Name for i_fund in range(0, len(param)): fund_name = param.index[i_fund] MfcData().cal_mfc_holding_barra_exposure_period( fund_name, beg_date, today) date_series = Date().get_trade_date_series(beg_date, today) for date in date_series: Index().make_weight_mixed(date) Index().load_index_factor_all(beg_date, today) Index().cal_index_exposure("000300.SH", beg_date, today) Index().cal_index_exposure("000905.SH", beg_date, today) Index().cal_index_exposure("881001.WI", beg_date, today) Index().cal_index_exposure("中证500+创业板综+中小板综", beg_date, today)
def get_fund_holder_exposure(self, fund, type_list=["STYLE"]): out_path = Parameter().get_read_file(self.holder_exposure_name) out_file = os.path.join(out_path, 'Fund_Holder_Exposure_' + fund + '.csv') exposure = pd.read_csv(out_file, index_col=[0], encoding='gbk') exposure.index = exposure.index.map(str) factor_name = Barra().get_factor_name(type_list=type_list) factor_name = list(factor_name["NAME_EN"].values) exposure = exposure[factor_name] return exposure
def StockBarraDecomposeReturnQuarter(report_date): """ 计算在给定时间点前后一个月 所有股票 拆分的 特异收益 风格收益 行业收益 和 市场收益 """ T = 20 beg_date = Date().get_trade_date_offset(report_date, -T) end_date = Date().get_trade_date_offset(report_date, T) date_series = Date().get_trade_date_series(beg_date, end_date) residual = Barra().get_stock_residual_return() result = {} for i in range(len(date_series)): date = date_series[i] residual_date = residual.loc[date, :] riskfactor_date = Barra().get_stock_riskfactor_return_date(date) residual_date = pd.DataFrame(residual_date.values, index=residual_date.index, columns=["Alpha"]) all_return = pd.concat([residual_date, riskfactor_date], axis=1) result[date] = all_return result_panel = pd.Panel(result) pct_sum = result_panel.sum(axis=0) barra_name = Barra().get_factor_name(['STYLE']) barra_name = list(barra_name['NAME_EN'].values) pct_sum['Style'] = pct_sum.ix[:, barra_name].sum(axis=1) barra_name = Barra().get_factor_name(['INDUSTRY']) barra_name = list(barra_name['NAME_EN'].values) pct_sum['Industry'] = pct_sum.ix[:, barra_name].sum(axis=1) pct_sum['All'] = pct_sum[['ChinaEquity', 'Industry', 'Style', 'Alpha']].sum(axis=1) print(" StockBarraDecomposeReturnQuarter %s" % report_date) out_path = 'E:\\3_Data\\4_fund_data\\7_fund_select_stock\\' file = os.path.join(out_path, "StockBarraDecomposeReturnQuarter", "StockBarraDecomposeReturnQuarter" + report_date + '.csv') pct_sum.to_csv(file)
def get_data_date(self, date): # alpha data date #################################################################################################### alpha_date_list = list(self.alpha_data.columns) alpha_date_list = list(filter(lambda x: x <= date, alpha_date_list)) alpha_date = pd.DataFrame(self.alpha_data[max(alpha_date_list)]) alpha_date.columns = [self.alpha_factor_name] # alpha_date = FactorPreProcess().standardization(alpha_date) # industry data date #################################################################################################### risk_factor_name = [] type_list = ['INDUSTRY'] barra_industry_date = Barra().get_factor_exposure_date( date=date, type_list=type_list) industry_columns = barra_industry_date.columns risk_factor_name.extend(industry_columns) self.industry_factor_name = industry_columns self.risk_factor_name = risk_factor_name # style data date #################################################################################################### type_list = ['STYLE'] barra_style_date = Barra().get_factor_exposure_date( date=date, type_list=type_list) barra_style_date = FactorPreProcess().standardization(barra_style_date) style_columns = barra_style_date.columns risk_factor_name.extend(style_columns) self.style_factor_name = style_columns self.risk_factor_name = risk_factor_name free_mv_date = pd.DataFrame(self.free_mv_data[date]) free_mv_date.columns = ['FreeMv'] return alpha_date, barra_industry_date, barra_style_date, free_mv_date
def cal_fund_holder_risk_alpha_return_quarter(self, fund, end_date): """ 根据季报持仓风格暴露进行收益拆分 """ beg_date = "20040101" type_list = ['STYLE', 'COUNTRY', 'INDUSTRY'] fund_exposure = FundHolderExposureQuarter( ).get_fund_holder_exposure_quarter_daily(fund, beg_date, end_date) barra_riskfactor_return = Barra().get_factor_return( beg_date, end_date, type_list=type_list) date_series = Date().get_trade_date_series(beg_date, end_date) fund_pct = FundFactor().get_fund_factor("Repair_Nav_Pct", fund_pool=[fund], date_list=date_series) fund_pct.columns = ["FundReturn"] if fund_exposure is None: return None fund_riskfactor_return = barra_riskfactor_return.mul(fund_exposure) fund_return = pd.concat([fund_pct, fund_riskfactor_return], axis=1) fund_return = fund_return.dropna() barra_factor_name = list( Barra().get_factor_name(type_list=["STYLE"])["NAME_EN"].values) fund_return["StyleReturn"] = fund_return[barra_factor_name].sum(axis=1) barra_factor_name = list( Barra().get_factor_name(type_list=["INDUSTRY"])["NAME_EN"].values) fund_return["IndustryReturn"] = fund_return[barra_factor_name].sum( axis=1) barra_factor_name = list( Barra().get_factor_name(type_list=["COUNTRY"])["NAME_EN"].values) fund_return["CountryReturn"] = fund_return[barra_factor_name].sum( axis=1) barra_factor_name = ["StyleReturn", "IndustryReturn", "CountryReturn"] fund_return["SumReturn"] = fund_return[barra_factor_name].sum(axis=1) fund_return["AlphaReturn"] = fund_return["FundReturn"] - fund_return[ "SumReturn"] data_new = fund_return.dropna() # 合并新数据 #################################################################### out_path = os.path.join(self.data_path_exposure, 'fund_holding_risk_alpha_return_quarter') out_file = os.path.join( out_path, 'Fund_Holder_Risk_Alpha_Return_Quarter_' + fund + "_" + end_date + '.csv') print(out_file) if os.path.exists(out_file): data_old = pd.read_csv(out_file, index_col=[0], encoding='gbk') data_old.index = data_old.index.map(str) params = FactorOperate().pandas_add_row(data_old, data_new) else: params = data_new params.to_csv(out_file) return data_new
def get_index_exposure_date(self, index_code, date, type_list=["STYLE"]): try: date = Date().get_trade_date_offset(date, 0) out_file = os.path.join(self.path, "Index_Barra_Exposure_" + index_code + '.csv') data = pd.read_csv(out_file, encoding='gbk', index_col=[0]) data.index = data.index.map(str) factor_name = Barra().get_factor_name(type_list=type_list) factor_name = list(factor_name["NAME_EN"].values) exposure_date = data.ix[date, factor_name] exposure_date = pd.DataFrame(exposure_date.values, index=exposure_date.index, columns=[index_code]).T except: print("读取出现问题") exposure_date = pd.DataFrame([]) return exposure_date
def get_index_exposure(self, index_code, beg_date, end_date, type_list=["STYLE"]): """ 得到一段时间的BARRA暴露 """ try: out_file = os.path.join(self.data_path_exposure, "Index_Barra_Exposure_" + index_code + '.csv') data = pd.read_csv(out_file, encoding='gbk', index_col=[0]) data.index = data.index.map(str) factor_name = Barra().get_factor_name(type_list=type_list) factor_name = list(factor_name["NAME_EN"].values) exposure = data.ix[beg_date:end_date, factor_name] except Exception as e: print("读取出现问题") exposure = pd.DataFrame([]) return exposure
def get_fund_holder_exposure_halfyear(self, fund_code, type_list=[ 'STYLE', 'COUNTRY', 'INDUSTRY' ]): """ 得到单个基金的所有半年持仓暴露(注意计算的是非满仓暴露) """ out_file = os.path.join( self.halfyear_exposure_path, 'Fund_Holder_Exposure_HalfYear_%s.csv' % fund_code) try: exposure = pd.read_csv(out_file, index_col=[0], encoding='gbk') exposure.index = exposure.index.map(str) factor_name = Barra().get_factor_name(type_list=type_list) factor_name = list(factor_name["NAME_EN"].values) exposure = exposure[factor_name] except Exception as e: exposure = pd.DataFrame([]) return exposure
def get_fund_holder_exposure_quarter(self, fund, type_list=[ 'STYLE', 'COUNTRY', 'INDUSTRY' ]): """ 得到单个基金的季度持仓暴露 (前十大重仓暴露) """ out_path = os.path.join(self.data_path_exposure, 'fund_holding_exposure_quarter') out_file = os.path.join( out_path, 'Fund_Holder_Exposure_Quarter_' + fund + '.csv') try: exposure = pd.read_csv(out_file, index_col=[0], encoding='gbk') exposure.index = exposure.index.map(str) factor_name = Barra().get_factor_name(type_list=type_list) factor_name = list(factor_name["NAME_EN"].values) exposure = exposure[factor_name] except Exception as e: exposure = None return exposure
def get_mfc_holding_barra_exposure_date(self, fund_name, date, type_list=["STYLE"]): """ 计算某只基金在某天的暴露 """ date = Date().change_to_str(date) file = os.path.join(self.exposure_data_path, "MfcRiskExposure_" + fund_name + '.csv') data = pd.read_csv(file, encoding='gbk', index_col=[0]) data.index = data.index.map(str) barra_name = list(Barra().get_factor_name(type_list)['NAME_EN'].values) try: data = data.loc[date, barra_name] data = pd.DataFrame(data.values, index=data.index, columns=[fund_name]).T except Exception as e: print(e) data = pd.DataFrame([]) return data
def cal_fund_holder_exposure_halfyear(self, fund_code, beg_date, end_date): """ 计算单个基金的半年持仓暴露(注意计算的是非满仓暴露) """ # fund_code, beg_date, end_date = "000001.OF", "20170101", "20190101" type_list = ['COUNTRY', 'STYLE', 'INDUSTRY'] barra_name = list(Barra().get_factor_name(type_list)['NAME_EN'].values) out_file = os.path.join( self.halfyear_exposure_path, 'Fund_Holder_Exposure_HalfYear_%s.csv' % fund_code) if not os.path.exists(out_file): beg_date = "20040101" date_series = Date().get_normal_date_series(beg_date, end_date, period='S') fund_holding = FundHolder().get_fund_stock_weight_halfyear(fund_code) if fund_holding is not None: date_series = list(set(date_series) & set(fund_holding.columns)) date_series.sort() print(date_series) else: return None if len(date_series) > 0: for i_date in range(0, len(date_series)): date = date_series[i_date] report_date = Date().get_normal_date_month_end_day(date) trade_date = Date().get_trade_date_month_end_day(date) print("Calculate HalfYear Holder Exposure %s %s" % (fund_code, report_date)) barra_exposure = Barra().get_factor_exposure_date( trade_date, type_list) fund_holding_date = FundHolder( ).get_fund_stock_weight_halfyear(fund_code) if (barra_exposure is None) or (len(fund_holding_date) == 0): exposure_add = pd.DataFrame([], columns=barra_name, index=[report_date]) else: fund_holding_date = pd.DataFrame(fund_holding[report_date]) fund_holding_date = fund_holding_date.dropna() fund_holding_date = fund_holding_date.sort_values( by=[report_date], ascending=False) fund_holding_date.columns = ["Weight"] fund_holding_date /= 100.0 data = pd.concat([fund_holding_date, barra_exposure], axis=1) data = data.dropna() if (len(data) == 0) or (data is None): exposure_add = pd.DataFrame([], columns=barra_name, index=[report_date]) else: exposure_add = pd.DataFrame([], columns=barra_name, index=[report_date]) for i_factor in range(len(barra_name)): factor_name = barra_name[i_factor] data_weight = data[['Weight', factor_name]] data_weight['StockExposure'] = data_weight[ 'Weight'] * data_weight[factor_name] exp = data_weight['StockExposure'].sum() exposure_add.ix[report_date, factor_name] = exp country_name = Barra().get_factor_name( ["COUNTRY"])["NAME_EN"].values[0] position = FundFactor().get_fund_factor( "Stock_Ratio", date_list=[report_date], fund_pool=[fund_code]) exposure_add.ix[ report_date, country_name] = position.values[0][0] / 100 if i_date == 0: exposure_new = exposure_add else: exposure_new = pd.concat([exposure_new, exposure_add], axis=0) else: exposure_new = pd.DataFrame([]) # 合并新数据 #################################################################### if os.path.exists(out_file): exposure_old = pd.read_csv(out_file, index_col=[0], encoding='gbk') exposure_old.index = exposure_old.index.map(str) params = FactorOperate().pandas_add_row(exposure_old, exposure_new) else: params = exposure_new if len(params) > 0: params = params[barra_name] params.to_csv(out_file)
def get_risk_barra_industry_country_exposure_date(self, date): type_list = ["INDUSTRY"] risk_barra_industry_country_exposure = Barra( ).get_factor_exposure_date(date=date, type_list=type_list) return risk_barra_industry_country_exposure
def get_risk_barra_country_exposure_date(self, date): type_list = ['COUNTRY'] risk_barra_country_exposure = Barra().get_factor_exposure_date( date=date, type_list=type_list) return risk_barra_country_exposure
def get_risk_barra_style_exposure_date(self, date): type_list = ['STYLE'] risk_barra_style_exposure = Barra().get_factor_exposure_date( date=date, type_list=type_list) return risk_barra_style_exposure
def opt_date(self, end_date, end_last_date, turnover_control=False): """ 单期优化 1、风格不能偏离整体中位数太多 2、仓位不能偏离整体中位数太多 3、单个基金上限 4、换手率约束 5、每个基金管理公司最多有两只(未约束) 6、基金个数约束(未约束) """ print("Project Nice Stock Fund Optimization Fund Weight at %s ......" % end_date) # Fund Pool path = os.path.join(self.data_path, 'fund_factor') file = os.path.join(path, 'FundFactor_%s.csv' % end_date) data = pd.read_csv(file, index_col=[0], encoding='gbk') fund_benchmark_list = Fund().get_fund_pool_code("20181231", self.fund_pool_name) fund_benchmark_list = list(set(fund_benchmark_list) & set(data.index)) fund_benchmark_list.sort() data = data.loc[fund_benchmark_list, :] # Name of Columns barra_style_list = Barra().get_factor_name(type_list=['STYLE'])['NAME_EN'].values position_list = Barra().get_factor_name(type_list=['COUNTRY'])['NAME_EN'].values risk_factor_list = Barra().get_factor_name(type_list=['COUNTRY', "STYLE"])['NAME_EN'].values alpha_col = [self.alpha_column] use_col = list(alpha_col) use_col.extend(risk_factor_list) # Fund Pool Filter data = data.dropna(subset=use_col) alpha_values = data[alpha_col].values data['UpRatio'] = self.fund_up_ratio weight_up_values = data['UpRatio'].values # BenchMark fund_benchmark_list = Fund().get_fund_pool_code("20181231", self.benchmark_name) fund_benchmark_list = list(set(fund_benchmark_list) & set(data.index)) fund_benchmark_list.sort() # Val stock_style_values = data[barra_style_list].values stock_position_values = data[position_list].values bench_style_values = data.loc[fund_benchmark_list, barra_style_list].median().values bench_position_values = data.loc[fund_benchmark_list, position_list].median().values bench_style_up_values = bench_style_values.T + self.style_deviate bench_style_low_values = bench_style_values.T - self.style_deviate bench_position_up_values = bench_position_values.T + self.position_deviate bench_position_low_values = bench_position_values.T - self.position_deviate if len(data) == 0: print("Project Nice Stock Fund Length of Fund is %s at %s ...... is Zero" % (len(data), end_date)) else: print("Project Nice Stock Fund Length of Fund is %s at %s ......" % (len(data), end_date)) w = cvx.Variable(len(data)) if turnover_control: try: file = os.path.join(self.data_path, 'fund_opt', 'FundOpt_%s.csv' % end_last_date) weight_last = pd.read_csv(file, index_col=[0], encoding='gbk') weight_last = weight_last.dropna(subset=[self.alpha_column]) if len(weight_last) == 0: weight_last = pd.Series(index=data.index) weight_last = weight_last.fillna(0.0) turnover = 2.00 else: weight_last = weight_last.loc[data.index, "Weight"] weight_last = weight_last.fillna(0.0) turnover = self.turnover except Exception as e: print(e) weight_last = pd.Series(index=data.index) weight_last = weight_last.fillna(0.0) turnover = 2.00 print(len(weight_last)) weight_last_values = weight_last.values prob = cvx.Problem(cvx.Maximize(alpha_values.T * w), [cvx.sum(w) == 1, w >= 0, w <= weight_up_values, cvx.sum(cvx.abs(w - weight_last_values)) <= turnover, stock_style_values.T * w <= bench_style_up_values, stock_style_values.T * w >= bench_style_low_values, stock_position_values.T * w <= bench_position_up_values, stock_position_values.T * w >= bench_position_low_values, ]) prob.solve() else: prob = cvx.Problem(cvx.Maximize(alpha_values.T * w), [cvx.sum(w) == 1, w >= 0, w <= weight_up_values, stock_style_values.T * w <= bench_style_up_values, stock_style_values.T * w >= bench_style_low_values, stock_position_values.T * w <= bench_position_up_values, stock_position_values.T * w >= bench_position_low_values, ]) prob.solve() print("status:", prob.status) print("optimal value", prob.value) weight = pd.DataFrame(w.value, columns=['Weight'], index=data.index) weight['Weight'] = weight['Weight'].map(lambda x: 0.0 if x < 0.02 else x) weight['Weight'] /= weight['Weight'].sum() result_risk_exposure = weight.T.dot(data[risk_factor_list]) result_risk_exposure = result_risk_exposure.T result_risk_exposure.columns = ['ResultExposure'] up_risk_exposure = pd.DataFrame(data[risk_factor_list].median()) up_risk_exposure.columns = ["UpExposure"] up_risk_exposure.loc[barra_style_list, "UpExposure"] += self.style_deviate up_risk_exposure.loc[position_list, "UpExposure"] += self.position_deviate low_risk_exposure = pd.DataFrame(data[risk_factor_list].median()) low_risk_exposure.columns = ["LowExposure"] low_risk_exposure.loc[barra_style_list, "LowExposure"] -= self.style_deviate low_risk_exposure.loc[position_list, "LowExposure"] -= self.position_deviate bench_risk_exposure = pd.DataFrame(data[risk_factor_list].median()) bench_risk_exposure.columns = ["BenchExposure"] exposure_result = pd.concat([bench_risk_exposure, up_risk_exposure, low_risk_exposure, result_risk_exposure], axis=1) data = pd.concat([data, weight], axis=1) data = data[data['Weight'] > 0.0] result = pd.concat([data, exposure_result.T], axis=0) col = ["SecName", "InvestType", "Corp", "SetupDate", "Weight", "RegressAlpha", "RegressAlphaIR"] col.extend(risk_factor_list) result = result[col] path = os.path.join(self.data_path, 'fund_opt') file = os.path.join(self.data_path, 'fund_opt', 'FundOpt_%s.csv' % end_date) if not os.path.exists(path): os.makedirs(path) result.to_csv(file)
for i in range(1, len(concat_data.columns)): col = concat_data.columns[i] corr.loc[col, 'Corr'] = concat_data.iloc[:, 0].corr(concat_data.loc[:, col]) return corr if __name__ == "__main__": # Data ########################################################################################## from quant.stock.barra import Barra from quant.project.multi_factor.alpha_model.exposure.alpha_factor import AlphaFactor name = 'alpha_raw_ep' date = "20171229" data_pandas = AlphaFactor().get_alpha_factor_exposure(name) factor_series = data_pandas[date] neutral_frame = Barra().get_factor_exposure_date( date, type_list=['STYLE', 'INDUSTRY']) params, t_values, factor_res = FactorNeutral().factor_exposure_neutral( factor_series, neutral_frame) print(params) print(factor_res) print(t_values) ##########################################################################################
def update_data_daily(): """ update barra """ Barra().load_barra()
def cal_fund_holder_exposure_quarter(self, fund, beg_date, end_date): """ 计算单个基金的季度持仓暴露 (前十大重仓暴露) """ type_list = ['STYLE', 'COUNTRY', 'INDUSTRY'] date_series = Date().get_normal_date_series(beg_date, end_date, period='Q') fund_holding = FundHolder().get_fund_stock_weight_quarter(fund) if fund_holding is not None: date_series = list(set(date_series) & set(fund_holding.columns)) date_series.sort() else: return None for i_date in range(0, len(date_series)): date = date_series[i_date] report_date = Date().get_normal_date_month_end_day(date) trade_date = Date().get_trade_date_month_end_day(date) barra_name = list( Barra().get_factor_name(type_list)['NAME_EN'].values) barra_exposure = Barra().get_factor_exposure_date( trade_date, type_list) print( "########## Calculate Quarter Holder Exposure %s %s ##########" % (fund, report_date)) if (barra_exposure is None) or (fund_holding is None): exposure_add = pd.DataFrame([], columns=barra_name, index=[report_date]) else: fund_holding_date = pd.DataFrame(fund_holding[report_date]) fund_holding_date = fund_holding_date.dropna() fund_holding_date = fund_holding_date.sort_values( by=[report_date], ascending=False) fund_holding_date.columns = ["Weight"] data = pd.concat([fund_holding_date, barra_exposure], axis=1) data = data.dropna() if (len(data) == 0) or (data is None): exposure_add = pd.DataFrame([], columns=barra_name, index=[report_date]) else: exposure_add = pd.DataFrame([], columns=barra_name, index=[report_date]) for i_factor in range(len(barra_name)): factor_name = barra_name[i_factor] data_weight = data[['Weight', factor_name]] data_weight['StockExposure'] = data['Weight'] * data[ factor_name] exposure_add.ix[report_date, factor_name] = data_weight[ 'StockExposure'].sum() / 100.0 country_name = Barra().get_factor_name( ["COUNTRY"])["NAME_EN"].values[0] position = FundFactor().get_fund_factor( "Stock_Ratio", date_list=[report_date], fund_pool=[fund]) position = position.values[0][0] exposure_add.ix[report_date, country_name] = position / 100 if i_date == 0: exposure_new = exposure_add else: exposure_new = pd.concat([exposure_new, exposure_add], axis=0) # 合并新数据 #################################################################### out_path = os.path.join(self.data_path_exposure, 'fund_holding_exposure_quarter') out_file = os.path.join( out_path, 'Fund_Holder_Exposure_Quarter_' + fund + '.csv') if os.path.exists(out_file): exposure_old = pd.read_csv(out_file, index_col=[0], encoding='gbk') exposure_old.index = exposure_old.index.map(str) params = FactorOperate().pandas_add_row(exposure_old, exposure_new) else: params = exposure_new params.to_csv(out_file)
def update_data(self): """ 更新数据 """ Stock().load_h5_primary_factor() Barra().load_barra_data()
def cal_fmp(self, fmp_name, type="Equal"): """ type = 'Equal' 对角线全为1 type = 'FreeMvSqrt' 对角线为自由流通市值的平方根 type = 'BarraStockCov' 对角线为Barra估计的股票协方差矩阵 """ for i_date in range(len(self.change_date_series) - 1): # read alpha data #################################################################################################### date = self.change_date_series[i_date] alpha_date, industry_dummy_date, barra_style_date, free_mv_date = self.get_data_date( date) alpha_date = alpha_date.dropna() alpha_date = FactorPreProcess().remove_extreme_value_mad( alpha_date) alpha_date = FactorPreProcess().standardization(alpha_date) code_list = list(alpha_date.index) code_list.sort() alpha_date = alpha_date.loc[code_list, :] # data #################################################################################################### if type == 'BarraStockCov': stock_cov = Barra().get_stock_covariance(date) code_list = list(set(alpha_date.index) & set(stock_cov.index)) code_list.sort() alpha_date = alpha_date.loc[code_list, :] stock_cov = stock_cov.loc[code_list, code_list] alpha_date = FactorPreProcess().remove_extreme_value_mad( alpha_date) alpha_date = FactorPreProcess().standardization(alpha_date) if len(alpha_date) > self.min_stock_num: if type == 'Equal': P = np.diag(np.ones(shape=(1, len(alpha_date)))[0]) elif type == 'BarraStockCov': P = stock_cov.values Q = np.zeros(shape=(P.shape[0], 1)) A = np.column_stack(alpha_date.values) A_add = np.ones(shape=(1, P.shape[0])) A = np.row_stack((A, A_add)) b = np.array([[1.0], [0.0]]) try: P = matrix(P) Q = matrix(Q) A = matrix(A) b = matrix(b) result = sol.qp(P, q=Q, A=A, b=b) fmp_raw_alpha = pd.DataFrame(np.array(result['x'][0:]), columns=[date], index=code_list).T print( "########## factor mimicking portfolio At %s ##########" % date) concat_data = pd.concat([fmp_raw_alpha.T, alpha_date], axis=1) concat_data = concat_data.dropna() print(concat_data.corr().values[0][0]) except Exception as e: fmp_raw_alpha = pd.DataFrame([], columns=[date], index=code_list).T print( "########## Quadratic Programming FMP is InCorrect %s ##########" % date) # concat #################################################################################################### if i_date == 0: fmp_raw_alpha_all = fmp_raw_alpha else: fmp_raw_alpha_all = pd.concat( [fmp_raw_alpha_all, fmp_raw_alpha], axis=0) # write data #################################################################################################### sub_path = os.path.join(self.path, 'fmp') file = os.path.join( sub_path, '%s_%s_%s.csv' % (self.alpha_factor_name, fmp_name, type)) fmp_raw_alpha_all = fmp_raw_alpha_all.T fmp_raw_alpha_all.to_csv(file)
def cal_fund_regression_risk_alpha_return_style(self, fund, beg_date, end_date): # 参数 #################################################################### exposure_index = FundRegressionExposureStyle( ).get_fund_regression_exposure_style(fund) if exposure_index is not None: # 取得数据 指数收益率数据 和 基金涨跌幅数据 #################################################################### barra_name = list(Barra().get_factor_name(['STYLE' ])['NAME_EN'].values) barra_name.extend( list(Barra().get_factor_name(["COUNTRY"])['NAME_EN'].values)) barra_return = Barra().get_factor_return( None, None, type_list=["INDUSTRY", "COUNTRY", "STYLE"]) barra_return = barra_return[barra_name] barra_return /= 100.0 if fund[len(fund) - 2:] == 'OF': fund_return = FundFactor().get_fund_factor( "Repair_Nav_Pct", None, [fund]) / 100.0 fund_return.columns = ["FundReturn"] else: fund_return = Index().get_index_factor(fund, attr=["PCT"]) fund_return.columns = ["FundReturn"] exposure_index = exposure_index.dropna(how="all") index_exposure_return = barra_return.mul(exposure_index) index_exposure_return = index_exposure_return.dropna(how="all") data = pd.concat([fund_return, index_exposure_return], axis=1) data = data.dropna(how="all") data = data.loc[index_exposure_return.index, :] data = data.dropna(subset=["FundReturn"]) data["SumReturn"] = data[barra_name].sum(axis=1, skipna=True) data["AlphaReturn"] = data["FundReturn"] - data["SumReturn"] data = data.loc[beg_date:end_date, :] data["CumFundReturn"] = (data["FundReturn"] + 1.0).cumprod() - 1.0 data["CumAlphaReturn"] = (data["AlphaReturn"] + 1.0).cumprod() - 1.0 data["CumSumReturn"] = (data["SumReturn"] + 1.0).cumprod() - 1.0 # 合并新数据 #################################################################### out_path = os.path.join(self.data_path_exposure, 'fund_regression_risk_alpha_return_style') out_file = os.path.join( out_path, 'Fund_Regression_Risk_Alpha_Style_' + fund + '.csv') if os.path.exists(out_file): params_old = pd.read_csv(out_file, index_col=[0], encoding='gbk') params_old.index = params_old.index.map(str) params = FactorOperate().pandas_add_row(params_old, data) else: params = data print(params) params.to_csv(out_file)
def opt_date(self, date): """ 优化 其中weight为相对权重 """ # get data print("Opt Relative Weight At ", date) next_date = Date().get_trade_date_offset(date, 1) alpha_data = self.get_stock_alpha_date(date) stock_risk_exposure = self.get_stock_risk_exposure_date(date) weight_bench = self.get_benchmark_weight_date(date) weight_last = self.get_last_stock_weight(date) bench_risk_exposure = self.get_benchmark_risk_exposure_date(date) stock_cov = self.get_stock_covariance_date(date) # turnover if len(weight_last) == 0: turnover = 2.00 else: turnover = self.double_turnover # multi_factor list stock_can_trade = self.get_can_trade_stock_date(next_date) stock_can_trade = list( set(stock_can_trade) & set(stock_risk_exposure.index) & set(stock_cov.index)) # data filter alpha_data = alpha_data.loc[stock_can_trade, :] alpha_data = alpha_data.fillna(alpha_data.mean()) stock_risk_exposure = stock_risk_exposure.loc[stock_can_trade, :] stock_risk_exposure = stock_risk_exposure.dropna(how='all') stock_risk_exposure = stock_risk_exposure.fillna( stock_risk_exposure.mean()) weight_bench = weight_bench.loc[stock_can_trade, :] weight_bench = weight_bench.fillna(0.0) weight_bench /= weight_bench.sum() stock_cov = stock_cov.loc[stock_can_trade, stock_can_trade] # weight weight_up = pd.DataFrame([], index=weight_bench.index, columns=['WeightUp']) weight_up['WeightUp'] = self.stock_deviate weight_low = -pd.DataFrame(weight_bench.values, index=weight_bench.index, columns=['WeightLow']) weight_low['WeightLow'] = weight_low['WeightLow'].map( lambda x: max(x, -self.stock_deviate)) weight_last = weight_last.loc[stock_can_trade, :] weight_last = weight_last.fillna(0.0) # values alpha_values = alpha_data.values weight_up_values = weight_up['WeightUp'].values weight_low_values = weight_low['WeightLow'].values weight_last_values = weight_last['Weight'].values weight_bench_values = weight_bench['BenchWeight'].values stock_cov_values = stock_cov.values # limit of style style_columns = list( Barra().get_factor_name(type_list=['STYLE'])['NAME_EN'].values) stock_style_values = stock_risk_exposure[style_columns].values # limit of industry industry_columns = list( Barra().get_factor_name(type_list=['INDUSTRY'])['NAME_EN'].values) stock_industry_values = stock_risk_exposure[industry_columns].values bench_industry_values = bench_risk_exposure[industry_columns].values[0] bench_industry_low_values = -bench_industry_values.T bench_industry_low_values = np.array( list( map(lambda x: max(x, -self.industry_deviate), bench_industry_low_values))) # opt n = len(stock_can_trade) if n > 0: w = cvx.Variable(n) prob = cvx.Problem( cvx.Maximize(alpha_values.T * w), [ cvx.sum(w) == 0, w >= weight_low_values, w <= weight_up_values, # cvx.quad_form(w, stock_cov_values) <= self.track_error ** 2, # cvx.sum(cvx.abs(w + weight_bench_values - weight_last_values)) <= turnover, stock_style_values.T * w <= self.style_deviate, stock_style_values.T * w >= -self.style_deviate, stock_industry_values.T * w <= self.industry_deviate, stock_industry_values.T * w >= bench_industry_low_values, ]) prob.solve() print("status:", prob.status) print("optimal value", prob.value) # weight weight_bench.columns = ['Weight'] weight_active = pd.DataFrame(w.value, columns=['Weight'], index=stock_can_trade) weight = weight_active.add(weight_bench) weight /= weight.sum() print("优化结果", len(weight)) self.generate_weight_file(weight, date, next_date) self.analysis_date(date)