def __init__(self): FundHolder.__init__(self) FundFactor.__init__(self) FundPool.__init__(self) FundStatic.__init__(self) FundExposure.__init__(self)
def cal_weight_date(self, quarter_date): """ 单个季度公募主动股票基金平均权重 每个基金的权都为1 """ fund_pool = FundPool().get_fund_pool_code(name="普通股票型基金", date=quarter_date) for i_fund in range(len(fund_pool)): fund = fund_pool[i_fund] try: asset = self.total_asset.loc[fund, quarter_date] / 100000000 except Exception as e: asset = 0.5 try: fund_holding = FundHolder().get_fund_holding_quarter(fund=fund) fund_holding_date = pd.DataFrame(fund_holding[quarter_date]) fund_holding_date = fund_holding_date.dropna() fund_holding_date *= asset fund_holding_date.columns = [fund] except Exception as e: fund_holding_date = pd.DataFrame([], columns=[fund]) if i_fund == 0: stock_data = fund_holding_date else: stock_data = pd.concat([stock_data, fund_holding_date], axis=1) stock_data = stock_data.dropna(how='all') stock_data_weight = pd.DataFrame(stock_data.sum(axis=1)) return stock_data_weight
def cal_quarter_holding_allfund_quarter(self, quarter_date): """ 计算 季报日 普通股票+偏股混合基金 基金平均持仓 """ fund_pool = FundPool().get_fund_pool_code(name="基金持仓基准基金池", date=quarter_date) halfyear_date = Date().get_last_fund_halfyear_date( Date().get_trade_date_offset(quarter_date, 15)) fund_turnover = Fund().get_fund_turnover() fund_turnover = fund_turnover.loc[fund_pool, :] fund_turnover[fund_turnover < 15] = np.nan fund_turnover_date = pd.DataFrame(fund_turnover[halfyear_date]) fund_turnover_date = fund_turnover_date.dropna() fund_turnover_date = fund_turnover_date.sort_values(by=[halfyear_date], ascending=True) fund_pool = list( fund_turnover_date.index[0:int(len(fund_turnover_date) / 2)]) for i_fund in range(len(fund_pool)): fund = fund_pool[i_fund] try: fund_holding = FundHolder().get_fund_holding_quarter(fund=fund) fund_holding_date = pd.DataFrame(fund_holding[quarter_date]) fund_holding_date = fund_holding_date.dropna() fund_holding_date *= 1.0 fund_holding_date.columns = [fund] except Exception as e: fund_holding_date = pd.DataFrame([], columns=[fund]) if i_fund == 0: stock_data = fund_holding_date else: stock_data = pd.concat([stock_data, fund_holding_date], axis=1) stock_data = stock_data.dropna(how='all') stock_data_weight = pd.DataFrame(stock_data.sum(axis=1)) stock_data_weight.columns = ["WEIGHT"] stock_data_weight /= stock_data_weight.sum() stock_data_weight.index.name = "CODE" sub_path = os.path.join(self.data_weight_path, self.name) if not os.path.exists(sub_path): os.makedirs(sub_path) file = os.path.join(sub_path, quarter_date + '_QuarterHolding.csv') stock_data_weight.to_csv(file)
def cal_all_wind_file(self): """ 生成wind文件 """ date_series = Date().get_normal_date_series("20150101", datetime.today(), "S") fund_holding = FundHolder().get_fund_holding_halfyear( fund=self.fund_code) for i_date in range(len(date_series)): half_year_date = date_series[i_date] publish_date = Date().get_trade_date_offset(half_year_date, 0) try: fund_holding_date = pd.DataFrame(fund_holding[half_year_date]) fund_holding_date = fund_holding_date.dropna() fund_holding_date.columns = ["Weight"] fund_holding_date = fund_holding_date.sort_values( by=['Weight'], ascending=False) fund_holding_date["Weight"] /= 100.0 fund_holding_date.loc[ 'Cash', 'Weight'] = 1 - fund_holding_date["Weight"].sum() fund_holding_date.index.name = "Code" fund_holding_date["CreditTrading"] = "No" fund_holding_date["Date"] = publish_date fund_holding_date["Price"] = 0.0 fund_holding_date["Direction"] = "Long" sub_path = os.path.join(self.wind_port_path, self.port_name) if not os.path.exists(sub_path): os.makedirs(sub_path) file = os.path.join( sub_path, '%s_%s.csv' % (self.port_name, publish_date)) fund_holding_date.to_csv(file) except Exception as e: pass
def cal_fund_holder_exposure_halfyear(self, fund_code, beg_date, end_date): """ 计算单个基金的半年持仓暴露(注意计算的是非满仓暴露) """ # fund_code, beg_date, end_date = "000001.OF", "20170101", "20190101" type_list = ['COUNTRY', 'STYLE', 'INDUSTRY'] barra_name = list(Barra().get_factor_name(type_list)['NAME_EN'].values) out_file = os.path.join( self.halfyear_exposure_path, 'Fund_Holder_Exposure_HalfYear_%s.csv' % fund_code) if not os.path.exists(out_file): beg_date = "20040101" date_series = Date().get_normal_date_series(beg_date, end_date, period='S') fund_holding = FundHolder().get_fund_stock_weight_halfyear(fund_code) if fund_holding is not None: date_series = list(set(date_series) & set(fund_holding.columns)) date_series.sort() print(date_series) else: return None if len(date_series) > 0: for i_date in range(0, len(date_series)): date = date_series[i_date] report_date = Date().get_normal_date_month_end_day(date) trade_date = Date().get_trade_date_month_end_day(date) print("Calculate HalfYear Holder Exposure %s %s" % (fund_code, report_date)) barra_exposure = Barra().get_factor_exposure_date( trade_date, type_list) fund_holding_date = FundHolder( ).get_fund_stock_weight_halfyear(fund_code) if (barra_exposure is None) or (len(fund_holding_date) == 0): exposure_add = pd.DataFrame([], columns=barra_name, index=[report_date]) else: fund_holding_date = pd.DataFrame(fund_holding[report_date]) fund_holding_date = fund_holding_date.dropna() fund_holding_date = fund_holding_date.sort_values( by=[report_date], ascending=False) fund_holding_date.columns = ["Weight"] fund_holding_date /= 100.0 data = pd.concat([fund_holding_date, barra_exposure], axis=1) data = data.dropna() if (len(data) == 0) or (data is None): exposure_add = pd.DataFrame([], columns=barra_name, index=[report_date]) else: exposure_add = pd.DataFrame([], columns=barra_name, index=[report_date]) for i_factor in range(len(barra_name)): factor_name = barra_name[i_factor] data_weight = data[['Weight', factor_name]] data_weight['StockExposure'] = data_weight[ 'Weight'] * data_weight[factor_name] exp = data_weight['StockExposure'].sum() exposure_add.ix[report_date, factor_name] = exp country_name = Barra().get_factor_name( ["COUNTRY"])["NAME_EN"].values[0] position = FundFactor().get_fund_factor( "Stock_Ratio", date_list=[report_date], fund_pool=[fund_code]) exposure_add.ix[ report_date, country_name] = position.values[0][0] / 100 if i_date == 0: exposure_new = exposure_add else: exposure_new = pd.concat([exposure_new, exposure_add], axis=0) else: exposure_new = pd.DataFrame([]) # 合并新数据 #################################################################### if os.path.exists(out_file): exposure_old = pd.read_csv(out_file, index_col=[0], encoding='gbk') exposure_old.index = exposure_old.index.map(str) params = FactorOperate().pandas_add_row(exposure_old, exposure_new) else: params = exposure_new if len(params) > 0: params = params[barra_name] params.to_csv(out_file)
def cal_weight_date(self, date, quarter_date): days_diff = Date().get_trade_date_diff(quarter_date, date) fund_pool = FundPool().get_fund_pool_code(name="基金持仓基准基金池", date=quarter_date) for i_fund in range(len(fund_pool)): fund = fund_pool[i_fund] try: fund_holding = FundHolder().get_fund_holding_quarter(fund=fund) fund_holding_date = pd.DataFrame(fund_holding[quarter_date]) fund_holding_date = fund_holding_date.dropna() fund_holding_date *= 1.0 fund_holding_date.columns = [fund] except Exception as e: fund_holding_date = pd.DataFrame([], columns=[fund]) if i_fund == 0: stock_data = fund_holding_date else: stock_data = pd.concat([stock_data, fund_holding_date], axis=1) stock_data = stock_data.dropna(how='all') stock_data_weight = pd.DataFrame(stock_data.sum(axis=1)) stock_data_weight /= stock_data_weight.sum() stock_data_weight.columns = ["Weight"] stock_data_weight = stock_data_weight.sort_values(by=['Weight'], ascending=False) stock_ratio = pd.DataFrame(self.stock_ratio.loc[fund_pool, quarter_date]) ratio = stock_ratio.median().values[0] / 100.0 if days_diff > 30: # 得到股票和基金涨跌幅 stock_pool = list(stock_data_weight.index) beg_date = Date().get_trade_date_offset(date, -61) date_series = Date().get_trade_date_series(beg_date, date) f_pct = self.index_return / ratio s_pct = self.stock_return.loc[date_series, stock_pool] s_pct = s_pct.T.dropna(how='all').T s_pct = s_pct.dropna(how='all') f_pct = f_pct.dropna() # 准备数据Lasso回归 data = pd.concat([f_pct, s_pct], axis=1) data = data.loc[beg_date:date, :] data = data.dropna(subset=['IndexReturn']) data = data.fillna(0.0) y = np.row_stack(data['IndexReturn'].values) x = data.iloc[:, 1:].values model = LassoCV(fit_intercept=True, positive=True) # LassoCV自动调节alpha可以实现选择最佳的alpha model.fit(x, y) print(model.alpha_) alpha = model.alpha_ model = Lasso(alpha=alpha, fit_intercept=False, positive=True) model.fit(x, y) res = pd.DataFrame(model.coef_[model.coef_ > 0.0001], index=s_pct.columns[model.coef_ > 0.0001], columns=[date]) res = res.sort_values(by=[date], ascending=False) result = pd.concat([res, stock_data_weight], axis=1) result = result.sort_values(by=['Weight'], ascending=False) else: result = stock_data_weight return result
def cal_fund_holder_exposure_quarter(self, fund, beg_date, end_date): """ 计算单个基金的季度持仓暴露 (前十大重仓暴露) """ type_list = ['STYLE', 'COUNTRY', 'INDUSTRY'] date_series = Date().get_normal_date_series(beg_date, end_date, period='Q') fund_holding = FundHolder().get_fund_stock_weight_quarter(fund) if fund_holding is not None: date_series = list(set(date_series) & set(fund_holding.columns)) date_series.sort() else: return None for i_date in range(0, len(date_series)): date = date_series[i_date] report_date = Date().get_normal_date_month_end_day(date) trade_date = Date().get_trade_date_month_end_day(date) barra_name = list( Barra().get_factor_name(type_list)['NAME_EN'].values) barra_exposure = Barra().get_factor_exposure_date( trade_date, type_list) print( "########## Calculate Quarter Holder Exposure %s %s ##########" % (fund, report_date)) if (barra_exposure is None) or (fund_holding is None): exposure_add = pd.DataFrame([], columns=barra_name, index=[report_date]) else: fund_holding_date = pd.DataFrame(fund_holding[report_date]) fund_holding_date = fund_holding_date.dropna() fund_holding_date = fund_holding_date.sort_values( by=[report_date], ascending=False) fund_holding_date.columns = ["Weight"] data = pd.concat([fund_holding_date, barra_exposure], axis=1) data = data.dropna() if (len(data) == 0) or (data is None): exposure_add = pd.DataFrame([], columns=barra_name, index=[report_date]) else: exposure_add = pd.DataFrame([], columns=barra_name, index=[report_date]) for i_factor in range(len(barra_name)): factor_name = barra_name[i_factor] data_weight = data[['Weight', factor_name]] data_weight['StockExposure'] = data['Weight'] * data[ factor_name] exposure_add.ix[report_date, factor_name] = data_weight[ 'StockExposure'].sum() / 100.0 country_name = Barra().get_factor_name( ["COUNTRY"])["NAME_EN"].values[0] position = FundFactor().get_fund_factor( "Stock_Ratio", date_list=[report_date], fund_pool=[fund]) position = position.values[0][0] exposure_add.ix[report_date, country_name] = position / 100 if i_date == 0: exposure_new = exposure_add else: exposure_new = pd.concat([exposure_new, exposure_add], axis=0) # 合并新数据 #################################################################### out_path = os.path.join(self.data_path_exposure, 'fund_holding_exposure_quarter') out_file = os.path.join( out_path, 'Fund_Holder_Exposure_Quarter_' + fund + '.csv') if os.path.exists(out_file): exposure_old = pd.read_csv(out_file, index_col=[0], encoding='gbk') exposure_old.index = exposure_old.index.map(str) params = FactorOperate().pandas_add_row(exposure_old, exposure_new) else: params = exposure_new params.to_csv(out_file)
def get_data(self): self.industry_data = FundHolder().get_fund_holding_industry_all() self.stock_data = FundHolder().get_fund_holding_all()
def cal_fund_holder_exposure(self, fund, beg_date, end_date): # 每半年计算一次 type_list = ['STYLE', 'COUNTRY', 'INDUSTRY'] date_series = Date().get_normal_date_series(beg_date, end_date, period='S') for i_date in range(len(date_series)): date = date_series[i_date] report_date = Date().get_normal_date_month_end_day(date) trade_date = Date().get_trade_date_month_end_day(date) barra_name = list( Barra().get_factor_name(type_list)['NAME_EN'].values) barra_exposure = Barra().get_factor_exposure_date( trade_date, type_list) fund_holding = FundHolder().get_fund_holding_report_date_fund( fund, report_date) print("########## Calculate Holder Exposure %s %s ##########" % (fund, report_date)) if (barra_exposure is None) or (fund_holding is None): exposure_add = pd.DataFrame([], columns=barra_name, index=[report_date]) else: fund_holding = fund_holding['Weight'] data = pd.concat([fund_holding, barra_exposure], axis=1) data = data.dropna() if (len(data) == 0) or (data is None): exposure_add = pd.DataFrame([], columns=barra_name, index=[report_date]) else: exposure_add = pd.DataFrame([], columns=barra_name, index=[report_date]) for i_factor in range(len(barra_name)): factor_name = barra_name[i_factor] data_weight = data[['Weight', factor_name]] data_weight['StockExposure'] = data['Weight'] * data[ factor_name] exposure_add.ix[report_date, factor_name] = data_weight[ 'StockExposure'].sum() / 100.0 if i_date == 0: exposure_new = exposure_add else: exposure_new = pd.concat([exposure_new, exposure_add], axis=0) # 合并新数据 #################################################################### out_path = Parameter().get_read_file(self.holder_exposure_name) out_file = os.path.join(out_path, 'Fund_Holder_Exposure_' + fund + '.csv') if os.path.exists(out_file): exposure_old = pd.read_csv(out_file, index_col=[0], encoding='gbk') exposure_old.index = exposure_old.index.map(str) params = pandas_add_row(exposure_old, exposure_new) else: params = exposure_new params.to_csv(out_file)
def get_fund_holder_data(self, quarter_date, quarter_last_date): """ 得到数据 """ # date quarter_trade_date = Date().get_trade_date_offset(quarter_date, 0) quarter_last_trade_date = Date().get_trade_date_offset( quarter_last_date, 0) print(quarter_date, quarter_last_date) print(quarter_trade_date, quarter_last_trade_date) # share data = FundHolder().get_fund_holding_stock_all() data_quarter = data[data.ReportDate == quarter_date] data_quarter = data_quarter[data_quarter.PublishDate <= Date(). get_trade_date_offset(quarter_date, 20)] quarter_share = pd.DataFrame( data_quarter.groupby(by=['StockCode']).sum()['Share']) data_quarter = data[data.ReportDate == quarter_last_date] data_quarter = data_quarter[data_quarter.PublishDate <= Date( ).get_trade_date_offset(quarter_last_date, 20)] quarter_last_share = pd.DataFrame( data_quarter.groupby(by=['StockCode']).sum()['Share']) # price adjust_factor = Stock().read_factor_h5("AdjustFactor") price_unadjust = Stock().read_factor_h5("Price_Unadjust") quarter_adjust = adjust_factor[quarter_trade_date] / adjust_factor[ quarter_last_trade_date] quarter_price = price_unadjust[quarter_trade_date] quarter_price_last = price_unadjust[quarter_last_trade_date] average_price = ( price_unadjust[quarter_trade_date] + price_unadjust[quarter_last_trade_date] / quarter_adjust) / 2.0 # industry industry = Stock().read_factor_h5("industry_citic1") industry_date = pd.DataFrame(industry[industry.columns[-1]]) industry_date.columns = ['Industry'] # concat result = pd.concat([ quarter_share, quarter_last_share, quarter_price, quarter_price_last, average_price, quarter_adjust, industry_date ], axis=1) result.columns = [ 'ShareQuarter', 'ShareQuarterLast', 'PriceQuarter', 'PriceQuarterLast', 'PriceMean', 'Adjust', 'Industry' ] result = result.dropna(subset=['Adjust', 'Industry']) result = result.fillna(0.0) # cal result['ShareQuarterLastAdjust'] = result['ShareQuarterLast'] * result[ 'Adjust'] result['MvQuarter'] = result['ShareQuarter'] * result['PriceQuarter'] result['MvQuarterLast'] = result['ShareQuarterLast'] * result[ 'PriceQuarterLast'] result['Inflow'] = ( result['ShareQuarter'] - result['ShareQuarterLastAdjust']) * result['PriceMean'] result['MvQuarter'] /= 100000000.0 result['Inflow'] /= 100000000.0 result['MvQuarterLast'] /= 100000000.0 return result
def update_data(self): """ 更新所需要的数据 """ Date().load_trade_date_series("D") Stock().load_h5_primary_factor() FundHolder().load_fund_holding_stock()