def update_data(beg_date, end_date): """ 更新需要的数据 """ Stock().load_all_stock_code_now() Fund().load_fund_holding_stock() Fund().load_fund_factor_all(beg_date, end_date) Stock().load_h5_primary_factor()
def cal_lasso_stock_wind_file(self): """ 循环日期 计算 LASSO 结果 """ sub_path = os.path.join(self.wind_data_path, self.port_name) if not os.path.exists(sub_path): os.makedirs(sub_path) stock_ratio = Fund().get_fund_factor('Stock_Ratio').T for i_date in range(0, len(self.date_series)): period_end_date = self.date_series[i_date] period_beg_date = Date().get_trade_date_offset(period_end_date, -(self.lasso_date_number - 2)) data_date = self.cal_lasso_stock_wind_file_date(period_beg_date, period_end_date) quarter_date = Date().get_last_fund_quarter_date(period_end_date) fund_pool = Fund().get_fund_pool_code(name="基金持仓基准基金池", date=quarter_date) stock_ratio_date = pd.DataFrame(stock_ratio.loc[fund_pool, quarter_date]) ratio = stock_ratio_date.median().values[0] / 100.0 print(period_end_date, ratio) data_date.columns = ['Weight'] data_date['Weight'] = data_date['Weight'] / data_date['Weight'].sum() * ratio data_date.loc['Cash', 'Weight'] = 1 - ratio data_date.index.name = 'Code' data_date["CreditTrading"] = "No" data_date["Date"] = period_end_date data_date["Price"] = 0.0 data_date["Direction"] = "Long" file = os.path.join(sub_path, '%s_%s.csv' % (self.port_name, period_end_date)) data_date.to_csv(file)
def get_input_data(self, fund_code, fund_name, fund_strategy, asset_allocation_strategy, bench_code, date, fund_manager): """ 输入基金名称 基金代码 当日日期 得到最近的交易日、季报日、半年报年报日 得到基金净值数据、季报和半年报持仓数据 """ self.fund_code = fund_code self.fund_name = fund_name self.date = date self.fund_strategy = fund_strategy self.asset_allocation_strategy = asset_allocation_strategy self.last_trade_date = Date().get_trade_date_offset(date, 0) self.quarter_date = Date().get_last_fund_quarter_date(date) self.halfyear_date = Date().get_last_fund_halfyear_date(date) print("参数时间", self.last_trade_date, self.quarter_date, self.halfyear_date) self.fund_nav = MfcData().get_mfc_public_fund_nav(self.fund_code) self.fund_top10_stock = Fund().get_fund_top10_stock_date( self.fund_code, self.quarter_date) self.fund_top10_stock /= 100.0 self.fund_all_stock = Fund().get_fund_all_stock_date( self.fund_code, self.halfyear_date) self.fund_all_stock /= 100.0 self.bench_code = bench_code self.fund_manager = fund_manager
def update_data(self): """ 更新需要的数据 """ # 更新开始和结束时间 end_date = Date().change_to_str(datetime.today()) # 基金基本情况和股票基本情况 Fund().load_findb_fund_info() Fund().load_findb_sec_info() Fund().load_wind_fund_info() # 基金净值数据 和指数价格额数据 beg_date = Date().get_trade_date_offset(end_date, -20) Fund().load_fund_factor_all(beg_date, end_date) Index().load_index_factor_all(beg_date, end_date) # 计算基金和指数暴露 Barra().load_barra_data() beg_date = Date().get_trade_date_offset(end_date, -20) Index().cal_index_exposure("000300.SH", beg_date=beg_date, end_date=end_date) Index().cal_index_exposure("000905.SH", beg_date=beg_date, end_date=end_date)
def lasso_fund_pool(): fund_holder = Fund().get_fund_holding_all() position_all = Fund().get_fund_factor("Stock_Ratio", date_list=["20180331"], fund_pool=None) code_list = list(code_list['wind_code'].values) date_list = Date().get_normal_date_series(beg_date="20041231", end_date=datetime.today(), period="Q") code_list.sort() date_list.sort() result = pd.DataFrame([], index=code_list, columns=date_list) for i_date in range(len(date_list)): for i_fund in range(len(code_list)): fund_code = code_list[i_fund] date = date_list[i_date] holder = fund_holder[fund_holder.FundCode == fund_code] holder = holder[holder.Date == date] holder = holder.sort_values(by=['Weight'], ascending=False) holder = holder.reset_index(drop=True) if len(holder) >= 10: holder = holder.ix[0:10, :] result.ix[fund_code, date] = holder.Weight.sum() print("计算 %s 在 %s 的前10大重仓股票为 %s" % (fund_code, date, holder.Weight.sum())) result.to_csv(path + '')
def update_data(self): """ 更新计算基金业绩所需要的数据 """ # 下载公募复权净值和计算专户复权净值 ########################################################################################## MfcData().load_mfc_public_fund_nav() MfcData().load_mfc_fund_div() MfcData().cal_mfc_private_fund_nav_all() self.get_zz500_adjust() today = datetime.today() # 下载指数价格数据 ########################################################################################## beg_date = Date().get_trade_date_offset(today, -30) Index().load_index_factor_all(beg_date=beg_date, end_date=today) # 合成指数价格数据 ########################################################################################## index_code = "H00905.CSI" index_ratio = 0.8 fix_return = 0.01 make_index_name = "中证500全收益指数80%+固定收益1%" Index().make_index_with_fixed(fix_return, index_ratio, index_code, make_index_name) fix_return = 0.08 index_ratio = 0.0 index_code = 'H00905.CSI' make_index_name = '固定收益年化8%' Index().make_index_with_fixed(fix_return, index_ratio, index_code, make_index_name) fix_return = 0.00 index_ratio = 0.6 index_code = 'H00905.CSI' make_index_name = "中证500全收益指数60%" Index().make_index_with_fixed(fix_return, index_ratio, index_code, make_index_name) fix_return = 0.0625 index_ratio = 0.0 index_code = 'H00905.CSI' make_index_name = "固定收益年化6.52%" Index().make_index_with_fixed(fix_return, index_ratio, index_code, make_index_name) fix_return = 0.00 index_ratio = 0.3 index_code = 'H00905.CSI' make_index_name = "中证500全收益指数30%" Index().make_index_with_fixed(fix_return, index_ratio, index_code, make_index_name) # 基金净值数据 ########################################################################################## Fund().load_fund_factor("Repair_Nav", "20180101", today) Fund().load_fund_factor("Repair_Nav_Pct", "20180101", today) Fund().load_fund_factor("Stock_Ratio", "20180101", today)
def weight_allstock_holding_date(report_date): report_date = Date().change_to_str(report_date) data = Fund().get_fund_holding_report_date(report_date) data = data[['FundCode', 'Weight', 'StockCode']] pool = Fund().get_fund_pool_code(report_date, "基金持仓基准基金池") fund_code = list(set(pool)) fund_code.sort() weight = Fund().get_wind_fund_asset(report_date) for i_fund in range(len(fund_code)): fund = fund_code[i_fund] data_fund = data[data['FundCode'] == fund] data_fund = data_fund.dropna(subset=['Weight']) data_fund = data_fund.sort_values(by=['Weight'], ascending=False) try: asset = weight.ix[fund, report_date] asset /= 100000000 except: asset = 1.0 if i_fund == 0: data_fund_all = data_fund.copy() data_fund_all["Asset_Weight"] = data_fund_all['Weight'] * asset all_weight = data_fund_all['Weight'].sum() if all_weight < 60: data_fund_all = pd.DataFrame([], columns=data_fund.columns) else: data_fund_all_add = data_fund.copy() data_fund_all_add["Asset_Weight"] = data_fund_all_add['Weight'] * asset all_weight = data_fund_all_add['Weight'].sum() if all_weight < 60: data_fund_all_add = pd.DataFrame([], columns=data_fund.columns) data_fund_all = pd.concat([data_fund_all, data_fund_all_add], axis=0) stock_code = list(set(data_fund_all['StockCode'].values)) stock_code.sort() weight_sum = data_fund_all['Asset_Weight'].sum() weight_code = pd.DataFrame([], index=stock_code, columns=['Asset_Weight']) for i_stock in range(len(stock_code)): stock = stock_code[i_stock] data_stock = data_fund_all[data_fund_all['StockCode'] == stock] stock_weight_sum = data_stock['Asset_Weight'].sum() weight_code.ix[stock, 'Asset_Weight'] = stock_weight_sum / weight_sum weight_code.index = weight_code.index.map(lambda x: x[0:6] + '-CN') out_path = Parameter().get_read_file("Fund_Stock_Holding_BenchMark") out_path = os.path.join(out_path, "weight_halfyear_all") if not os.path.exists(out_path): os.makedirs(out_path) out_file = os.path.join(out_path, "weight_halfyear_all_" + report_date + '.csv') print(out_file) weight_code.to_csv(out_file, header=None)
def get_data(self, report_date): """ 得到所有需要的数据(股票持仓、基金池、基金规模、基金净值等等)""" data = Fund().get_fund_holding_stock_date(report_date) hold_data = data[['FundCode', 'Weight', 'StockCode']] pool = Fund().get_fund_pool_code(report_date, "基金持仓基准基金池") fund_code = list(set(pool)) fund_code.sort() return hold_data, fund_code
def get_etf_fund_code_list(): """ 得到etf基金列表 """ index_fund = Fund().get_fund_pool_all("20181231", '指数型基金') index_fund = index_fund[index_fund['if_etf'] == 'ETF基金'] index_fund = index_fund[index_fund['if_connect'] == '非联接基金'] index_fund = index_fund[index_fund['if_hk'] == '非港股基金'] index_fund = index_fund.reset_index(drop=True) etf_fund_code_list = list(index_fund['wind_code'].values) return etf_fund_code_list
def cal_fund_index(self, fund_pool_name, my_index_code, my_fund_code, beg_date, end_date): """ 计算某只基金所在基金池的各项指标(包括基金收益、基金基准收益、超额收益、跟踪误差及信息比率) 剔除新基金 """ fund_pool = Fund().get_fund_pool_all(date="20181231", name=fund_pool_name) fund_pool = fund_pool[fund_pool['setupdate'] < beg_date] fund_pool = list(fund_pool['wind_code'].values) fund_pool.append(my_fund_code) result = pd.DataFrame([], index=fund_pool) data = Fund().get_fund_factor("Repair_Nav") for i in range(0, len(fund_pool)): fund_code = fund_pool[i] if fund_code == my_fund_code: index_code = my_index_code else: index_code = "881001.WI" print(fund_code, index_code, beg_date, end_date) try: fund = pd.DataFrame(data[fund_code]) index = Index().get_index_factor(index_code, attr=["CLOSE"]) fs = FinancialSeries(pd.DataFrame(fund), pd.DataFrame(index)) fund_return = fs.get_interval_return_annual(beg_date, end_date) bench_return = fs.get_interval_return_benchmark(beg_date, end_date) excess_return = fs.get_interval_excess_return(beg_date, end_date) tracking_error = fs.get_interval_tracking_error(beg_date, end_date) ir = excess_return / tracking_error result.loc[fund_code, "基准收益"] = bench_return result.loc[fund_code, "基金收益"] = fund_return result.loc[fund_code, "超额收益"] = - bench_return + fund_return result.loc[fund_code, "跟踪误差"] = tracking_error result.loc[fund_code, "信息比率"] = ir except Exception as e: print(e) result = result.dropna() result = result[~result.index.duplicated()] result = result.sort_values(by=['基金收益'], ascending=False) result['收益名次'] = range(1, len(result) + 1) result['收益排名'] = result['收益名次'].map(lambda x: str(x) + '/' + str(len(result))) file = "%s_%s_%s_%s.csv" % (fund_pool_name, my_fund_code, beg_date, end_date) file = os.path.join(self.data_path, 'data', file) result.to_csv(file)
def equal_top10stock_holding_date(report_date): report_date = Date().change_to_str(report_date) data = Fund().get_fund_holding_report_date(report_date) data = data[['FundCode', 'Weight', 'StockCode']] pool = Fund().get_fund_pool_code(report_date, "基金持仓基准基金池") fund_code = list(set(pool)) fund_code.sort() for i_fund in range(len(fund_code)): fund = fund_code[i_fund] data_fund = data[data['FundCode'] == fund] data_fund = data_fund.dropna(subset=['Weight']) data_fund = data_fund.sort_values(by=['Weight'], ascending=False) if i_fund == 0: data_fund_top10 = data_fund.iloc[:10, :] top10_weight = data_fund_top10['Weight'].sum() if top10_weight < 30: data_fund_top10 = pd.DataFrame([], columns=data_fund.columns) else: data_fund_top10_add = data_fund.iloc[:10, :] top10_weight = data_fund_top10_add['Weight'].sum() if top10_weight < 30: data_fund_top10_add = pd.DataFrame([], columns=data_fund.columns) data_fund_top10 = pd.concat([data_fund_top10, data_fund_top10_add], axis=0) stock_code = list(set(data_fund_top10['StockCode'].values)) stock_code.sort() weight_sum = data_fund_top10['Weight'].sum() weight_code = pd.DataFrame([], index=stock_code, columns=['Weight']) for i_stock in range(len(stock_code)): stock = stock_code[i_stock] data_stock = data_fund_top10[data_fund_top10['StockCode'] == stock] stock_weight_sum = data_stock['Weight'].sum() weight_code.ix[stock, 'Weight'] = stock_weight_sum / weight_sum weight_code.index = weight_code.index.map(lambda x: x[0:6] + '-CN') out_path = Parameter().get_read_file("Fund_Stock_Holding_BenchMark") out_path = os.path.join(out_path, "equal_quarter_top10") if not os.path.exists(out_path): os.makedirs(out_path) out_file = os.path.join(out_path, "equal_quarter_top10_" + report_date + '.csv') print(out_file) weight_code.to_csv(out_file, header=None)
def __init__(self): Data.__init__(self) self.sub_data_path = r'4_fund_data\stock_predict' self.data_path_exposure = os.path.join(self.primary_data_path, self.sub_data_path) self.wind_port_path = WindPortUpLoad().path self.stock_pct = Stock().read_factor_h5("Pct_chg").T self.fund_pct = Fund().get_fund_factor("Repair_Nav_Pct") self.bold_pct = Index().get_index_factor("885062.WI", attr=['PCT']) * 100 self.bold_pct.columns = ['885062.WI'] self.stock_ratio = Fund().get_fund_factor("Stock_Ratio") self.regression_len = 60 self.regression_min_len = 12
def equal_allstock_halfyear_date(report_date): report_date = Date().change_to_str(report_date) data = Fund().get_fund_holding_stock_date(report_date) data = data[['FundCode', 'Weight', 'StockCode']] pool = Fund().get_fund_pool_code(report_date, "基金持仓基准基金池") fund_code = list(set(pool)) fund_code.sort() for i_fund in range(len(fund_code)): fund = fund_code[i_fund] data_fund = data[data['FundCode'] == fund] data_fund = data_fund.dropna(subset=['Weight']) data_fund = data_fund.sort_values(by=['Weight'], ascending=False) if i_fund == 0: data_fund_all = data_fund.copy() all_weight = data_fund_all['Weight'].sum() if all_weight < 60: data_fund_all = pd.DataFrame([], columns=data_fund.columns) else: data_fund_add = data_fund.copy() all_weight = data_fund['Weight'].sum() if all_weight < 60: data_fund_add = pd.DataFrame([], columns=data_fund.columns) data_fund_all = pd.concat([data_fund_all, data_fund_add], axis=0) stock_code = list(set(data_fund_all['StockCode'].values)) stock_code.sort() weight_sum = data_fund_all['Weight'].sum() weight_code = pd.DataFrame([], index=stock_code, columns=['Weight']) for i_stock in range(len(stock_code)): stock = stock_code[i_stock] data_stock = data_fund_all[data_fund_all['StockCode'] == stock] stock_weight_sum = data_stock['Weight'].sum() weight_code.ix[stock, 'Weight'] = stock_weight_sum / weight_sum weight_code.index = weight_code.index.map(lambda x: x[0:6] + '-CN') out_path = os.path.join(Fund().data_path_holder, "fund_holding_benchmark") out_path = os.path.join(out_path, "equal_halfyear_all") if not os.path.exists(out_path): os.makedirs(out_path) out_file = os.path.join(out_path, "equal_halfyear_all_" + report_date + '.csv') print(out_file) weight_code.to_csv(out_file, header=None)
def load_data(): """ 更新数据 """ print(" 更新本周数据 ") # 参数 today = datetime.today().strftime("%Y%m%d") # 更新日期(早晨已经更新日期序列) Date().load_trade_date_series_all() # 股票因子数据(网盘h5下载数据) Stock().load_h5_primary_factor() # 更新 Barra数据 beg_date = Date().get_trade_date_offset(today, -5) Barra().update_barra(beg_date, today) # 更新Fund(基础数据、因子数据、持仓数据) beg_date = Date().get_trade_date_offset(today, -90) Fund().update_fund_data(beg_date, today) # 更新Index(因为IndexWeight每天更新,这里不用更新) # 需要wind流量 beg_date = Date().get_trade_date_offset(today, -5) Index().load_index_factor_all(beg_date, today) # Stock静态数据,例如股票池、成立日期等等 # 需要wind流量 beg_date = Date().get_trade_date_offset(today, -5) Stock().load_stock_static_data_all(beg_date, today) os.system("pause")
def FundBarraDecomposeReturnQuarter(fund_holding_date, report_date): """ 计算在给定时间点前后一个月 基金 拆分的 特异收益 风格收益 行业收益 和 市场收益 """ # params ################################################################################### report_date = '20171231' fund_code = '000001.OF' path = 'E:\\3_Data\\4_fund_data\\7_fund_select_stock\\' fund_holding_all = Fund().get_fund_holding_all() fund_holding_date = fund_holding_all[fund_holding_all['Date'] == report_date] fund_holding = fund_holding_date[fund_holding_date['FundCode'] == fund_code] fund_holding.index = fund_holding['StockCode'] fund_holding = fund_holding.ix[~fund_holding.index.duplicated(), :] fund_holding = fund_holding.dropna(subset=['Weight']) fund_holding = fund_holding.sort_values(by=['Weight'], ascending=False) file = os.path.join( path, "StockBarraDecomposeReturnQuarter", "StockBarraDecomposeReturnQuarter" + report_date + '.csv') stock_decompose_return = pd.read_csv(file, index_col=[0], encoding='gbk') stock_decompose_return = stock_decompose_return.ix[fund_holding.index, :] weight_mat = np.transpose( np.tile(fund_holding['Weight'].values, (len(stock_decompose_return.columns), 1))) weight_pd = pd.DataFrame(weight_mat, index=fund_holding.index, columns=stock_decompose_return.columns) #################################################################################### return result
def set_info(self, port_name, benchmark_code): """ 输入 组合名称,基准指数代码, 读取给雷资产的日度收益率数据 """ self.port_name = port_name self.benchmark_code = benchmark_code sub_path = os.path.join(self.save_path, self.port_name) if not os.path.exists(sub_path): os.makedirs(sub_path) # 股票收益率数据 # pct = Stock().read_factor_h5("Pct_chg") price = Stock().read_factor_h5("Price_Adjust") pct = price.T.pct_change(fill_method=None).T * 100 # pct[pct > 50.0] = np.nan # pct[pct > 12.0] = 12.0 # pct = FactorFillNa().replace_suspension_with_nan(pct) # pct = FactorFillNa().fillna_with_mad_market(pct) self.stock_return = pct self.index_return = Index().get_index_cross_factor("PCT").T * 100 self.fund_return = Fund().get_fund_factor("Repair_Nav_Pct").T self.asset_return = pd.concat( [self.stock_return, self.fund_return, self.index_return], axis=0)
def stock_ratio_10(beg_date, end_date): factor_name = "Stock_Ratio_10" fund_holder = Fund().get_fund_holding_all() quarter_date = Date().get_last_fund_quarter_date(end_date) position_all = Fund().get_fund_factor("Stock_Ratio", date_list=[quarter_date], fund_pool=None).T position_all.columns = ['Stock_Weight'] position_all = position_all[position_all['Stock_Weight'] > 65] code_list = list(position_all.index) date_list = Date().get_normal_date_series(beg_date=beg_date, end_date=end_date, period="Q") code_list.sort() date_list.sort() new_data = pd.DataFrame([], index=code_list, columns=date_list) for i_date in range(len(date_list)): for i_fund in range(len(code_list)): fund_code = code_list[i_fund] date = date_list[i_date] holder = fund_holder[fund_holder.FundCode == fund_code] holder = holder[holder.Date == date] holder = holder.sort_values(by=['Weight'], ascending=False) holder = holder.reset_index(drop=True) if len(holder) >= 10: holder = holder.ix[0:10, :] new_data.ix[fund_code, date] = holder.Weight.sum() print("计算 %s 在 %s 的前10大重仓股票为 %s" % (fund_code, date, holder.Weight.sum())) out_file = Parameter().get_read_file(factor_name) if os.path.exists(out_file): data = pd.read_csv(out_file, encoding='gbk', index_col=[0]) data.index = data.index.map(str) data = pandas_add_row(data, new_data) else: print(" File No Exist ", factor_name) data = new_data data.to_csv(out_file)
def ols_weight_fund_pool(self, pool_name, pool_date, port_name, beg_date, end_date, period='M', stock_weight=None): """ 将一个基金池内的所有OLS回归等权作为结果拆分成为可以上传的wind组合文件 包含仓位信息 """ # 基金lasso预测结果等权相加 fund_pool = Fund().get_fund_pool_code(pool_date, pool_name) date_series = Date().get_trade_date_series(beg_date, end_date, period) for i_date in range(len(date_series)): weight_date = pd.DataFrame([]) for i_fund in range(len(fund_pool)): date = date_series[i_date] fund_code = fund_pool[i_fund] print(date, fund_code) data = FundOLSStockWeight().get_ols_stock_weight_date( fund_code, date) data = data.dropna() weight_date = pd.concat([weight_date, data], axis=1) if len(weight_date) > 0: result = pd.DataFrame( weight_date.sum(axis=1) / len(weight_date.columns)) result.columns = ['Weight'] result = result[result['Weight'] > 0.0] result = result.sort_values(by=["Weight"], ascending=False) if stock_weight is not None: result['Weight'] = (result['Weight'] / result['Weight'].sum()) * stock_weight stock_sum = result['Weight'].sum() result.loc['Cash', 'Weight'] = 1 - stock_sum result['Code'] = result.index result['Date'] = date result['Price'] = "0.0" result['Direction'] = "Long" result['CreditTrading'] = 'No' sub_path = os.path.join(self.wind_port_path, port_name) if not os.path.exists(sub_path): os.makedirs(sub_path) file = os.path.join(self.wind_port_path, port_name, port_name + '_' + date + '.csv') result.to_csv(file)
def get_data(self, beg_date, end_date, period='M', fund_pool_name="优质基金池", pool_date='20180630'): """ 得到回测时间 回测基金池 """ self.date_series = Date().get_trade_date_series(beg_date, end_date, period=period) self.fund_pool = Fund().get_fund_pool_code(pool_date, fund_pool_name) """ 得到 基金复权净值增长率 股票涨跌幅 基金市值 """ self.stock_mv = Stock().read_factor_h5("Mkt_freeshares") self.fund_return = Fund().get_fund_factor("Repair_Nav_Pct") self.stock_return = Stock().read_factor_h5("Pct_chg").T
def cal_exposure(beg_date, end_date, out_path): # beg_date = "20130629" # end_date = "20180715" # out_path = 'E:\\3_数据\\4_fund_data\\6_index_enhanced_fund_snowball\\' # 计算沪深300、中证500的暴露 ########################################################################################## Index().cal_index_exposure_period("000300.SH", beg_date=beg_date, end_date=end_date) Index().cal_index_exposure_period("000905.SH", beg_date=beg_date, end_date=end_date) # 沪深300基金的暴露 ########################################################################################## index_name = '沪深300' file = os.path.join(out_path, 'filter_fund_pool\\', '基金最终筛选池_' + index_name + '.xlsx') fund_code = pd.read_excel(file, index_col=[1], encoding='gbk') fund_code_list = list(fund_code.index) for i_fund in range(0, len(fund_code_list)): fund = fund_code_list[i_fund] print(fund) Fund().cal_fund_holder_exposure(fund=fund, beg_date=beg_date, end_date=end_date) # 中证500基金暴露 ########################################################################################## index_name = '中证500' file = os.path.join(out_path, 'filter_fund_pool\\', '基金最终筛选池_' + index_name + '.xlsx') fund_code = pd.read_excel(file, index_col=[1], encoding='gbk') fund_code_list = list(fund_code.index) for i_fund in range(0, len(fund_code_list)): fund = fund_code_list[i_fund] print(fund) Fund().cal_fund_holder_exposure(fund=fund, beg_date=beg_date, end_date=end_date)
def get_fund_pool(self, end_date): """ 成立时间满足一定时间的基金 """ print("Project Nice Stock Fund Getting Fund Pool at %s ......" % end_date) fund_pool = Fund().get_fund_pool_all(name=self.fund_pool_name, date="20181231") fund_info = Fund().get_wind_fund_info() fund_pool.index = fund_pool['wind_code'] fund_data = pd.concat([fund_pool, fund_info], axis=1) setup_date = Date().get_trade_date_offset(end_date, -self.setup_date_len) fund_data = fund_data[fund_data['SetupDate'] <= setup_date] fund_data = fund_data[['sec_name', 'SetupDate', 'InvestType', 'Corp']] fund_data.columns = ['SecName', 'SetupDate', 'InvestType', 'Corp'] fund_data = fund_data.dropna() return fund_data
def cal_lasso_stock_pool_all(): lasso_period = 60 lasso_period_min = 40 beg_date = "20041231" end_date = datetime.today() out_path = "E:\\3_数据\\4_fund_data\\4_fund_holding_predict\\lasso_stock_pool\\" quarter_date = Date().get_last_fund_quarter_date(end_date) quarter_date = '20180630' date_series = Date().get_trade_date_series(beg_date, end_date, period="M") fund_pool = Fund().get_fund_pool_code(quarter_date, "优质基金池") s_marketvalue = Stock().get_h5_primary_factor("Mkt_freeshares", date_list=None) f_pct = Fund().get_fund_factor("Repair_Nav_Pct", date_list=None, fund_pool=fund_pool) s_pct = Stock().get_h5_primary_factor("Pct_chg", date_list=None, stock_pool=None).T for i_fund in range(0, len(fund_pool)): fund_code = fund_pool[i_fund] for i_date in range(0, len(date_series)): period_end_date = date_series[i_date] period_beg_date = Date().get_trade_date_offset( period_end_date, -lasso_period) res_add = cal_lasso_stock_pool(s_marketvalue, f_pct, s_pct, period_beg_date, period_end_date, fund_code, out_path, lasso_period_min) if i_date == 0: res = res_add else: res = pd.concat([res, res_add], axis=1) print(res) res.to_csv(out_path + 'LASSO回归股票池_' + fund_code + "_AllDate.csv")
def regress_fund_pool(self, name="基金持仓基准基金池", beg_date="20040301", end_date=datetime.today().strftime("%Y%m%d")): fund_code_list = Fund().get_fund_pool_code(name=name) for i_code in range(1, len(fund_code_list)): code = fund_code_list[i_code] self.regress_fund(code, beg_date, end_date)
def get_fund_manager_info(self, beg_date, end_date): """ 当前基金经理和最近基金经理有无变动(运行时间较长,暂时没用) """ print("Project Nice Stock Fund Getting FundManager at %s ......" % end_date) fund_pool = self.get_fund_pool(end_date) manager_columns = ['FundManager', 'FundManagerChange'] fund_factor = pd.DataFrame([], index=fund_pool.index, columns=manager_columns) for i_fund in range(len(fund_pool.index)): fund_code = fund_pool.index[i_fund] print("FundManager", fund_code, end_date) manager = Fund().get_fund_manager(end_date, fund_code) manager_change = Fund().get_fund_manager_change_info(beg_date, end_date, fund_code) fund_factor.loc[fund_code, "FundManager"] = manager fund_factor.loc[fund_code, 'FundManagerChange'] = manager_change return fund_factor
def cal_ols_stock_weight_all(): lasso_period = 60 beg_date = "20041231" end_date = datetime.today() out_path = "E:\\3_数据\\4_fund_data\\4_fund_holding_predict\\ols_stock_weight\\" quarter_date = Date().get_last_fund_quarter_date(end_date) quarter_date = '20180630' date_series = Date().get_trade_date_series(beg_date, end_date, period="M") fund_pool = Fund().get_fund_pool_code(quarter_date, "优质基金池") f_pct = Fund().get_fund_factor("Repair_Nav_Pct", date_list=None, fund_pool=fund_pool) s_pct = Stock().get_h5_primary_factor("Pct_chg", date_list=None, stock_pool=None).T last_fund_holding = Fund().get_fund_holding_all() position_all = Fund().get_fund_factor("Stock_Ratio", None, None) for i_fund in range(0, len(fund_pool)): fund_code = fund_pool[i_fund] for i_date in range(0, len(date_series)): period_end_date = date_series[i_date] period_beg_date = Date().get_trade_date_offset( period_end_date, -lasso_period) res_add = cal_ols_stock_weight(f_pct, s_pct, last_fund_holding, position_all, period_beg_date, period_end_date, fund_code, out_path) if i_date == 0: res = res_add else: res = pd.concat([res, res_add], axis=1) res.to_csv(out_path + '最后预测持仓权重_' + fund_code + "_AllDate.csv") print(res)
def update_data(self): """ 更新基金净值 和 持仓数据 指数价格数据""" Fund().update_fund_holding() MfcData().load_mfc_public_fund_nav() Index().load_index_factor(index_code='H00985.CSI', beg_date="20040101", end_date=datetime.today()) Index().load_index_factor(index_code="885001.WI", beg_date="20180101", end_date=datetime.today())
def get_pct_data(self): """ 收益率数据 """ index_return = Index().get_index_cross_factor("PCT").T * 100 fund_return = Fund().get_fund_factor("Repair_Nav_Pct").T asset_return = pd.concat([fund_return, index_return], axis=0) asset_return = asset_return.T asset_return /= 100.0 return asset_return
def cal_factor_mrar_all(self, T, r, beg_date, end_date): date_series = Date().get_normal_date_series(beg_date, end_date, "Q") result = pd.DataFrame([], index=date_series) def fun_date(x): year = int(x[0:4]) month = int(x[4:6]) day = calendar.monthrange(year, month)[1] date = datetime(year, month, day).strftime("%Y%m%d") return date macro_code = "S0059744" macro_name = "中债国债到期收益率-1年" macro_data = Macro().get_macro_data(macro_code, None, None) macro_data.columns = [macro_name] macro_data['YearMonth'] = macro_data.index.map(lambda x: x[0:6]) macro_data = macro_data.groupby(by=['YearMonth']).mean()[macro_name] macro_data.index = macro_data.index.map(fun_date) macro_data = pd.DataFrame(macro_data) macro_data.columns = [macro_name] macro_data /= 12.0 fund_data = Fund().get_fund_factor("Repair_Nav_Pct", None, None) for i in range(len(date_series)): # 日期 ###################################################################################################### report_date = date_series[i] # 基金池信息 ###################################################################################################### fund_code_list = FundPool().get_fund_pool_code(date=report_date, name="基金持仓基准基金池") fund_code_list3 = FundPool().get_fund_pool_code(date=report_date, name="量化基金") fund_code_list2 = FundPool().get_fund_pool_code(date="20180630", name="东方红基金") fund_code_list.extend(fund_code_list2) fund_code_list.extend(fund_code_list3) fund_code_list = list(set(fund_code_list)) fund_code_list.sort() for i_fund in range(len(fund_code_list)): fund = fund_code_list[i_fund] print(report_date, fund) try: res = self.cal_factor_mrar(fund, T, r, end_date, fund_data, macro_data) result.loc[report_date, fund] = res except Exception as e: result.loc[report_date, fund] = np.nan result = result.T file = os.path.join(self.path, "MorningStar_MRAR_" + str(r) + "_" + str(T) + '.csv') result.to_csv(file)
def GetAllFundAllDateFactorAlphaFile(in_path, out_path, factor_name_list, date_series): # params #################################################################################### # in_path = 'E:\\3_Data\\4_fund_data\\7_fund_select_stock\\StockAlpha\\' # out_path = 'E:\\3_Data\\4_fund_data\\7_fund_select_stock\\FundSelectStockAlpha\\' # # factor_name_list = ["TotalMarketValue", "BP", "IncomeYOYDaily", "ROETTMDaily", "Industry"] # # beg_date = "20170530" # end_date = "20180630" # date_series = Date().get_normal_date_series(beg_date, end_date, "S") if not os.path.exists(out_path): os.makedirs(out_path) # read data #################################################################################### fund_holding_all = Fund().get_fund_holding_all() # cal alpha #################################################################################### for i_factor in range(len(factor_name_list)): factor_name = factor_name_list[i_factor] stock_alpha = GetStockAlphaAtFactorFile(in_path, factor_name) for i_date in range(len(date_series)): report_date = date_series[i_date] fund_holding_date = fund_holding_all[fund_holding_all['Date'] == report_date] alpha_date = GetAllFundAlphaOnFactorFile(stock_alpha, fund_holding_date, factor_name, report_date) if i_date == 0: new_data = alpha_date else: new_data = pd.concat([new_data, alpha_date], axis=1) new_data = new_data.T.dropna(how="all") filename = os.path.join(out_path, 'FundSelectStockAlpha_' + factor_name + '.csv') if os.path.exists(filename): old_data = pd.read_csv(filename, index_col=[0], encoding='gbk') old_data.index = old_data.index.map(str) result = pandas_add_row(old_data, new_data) else: result = new_data result.to_csv(filename)
def update_data(self): """ 更新基金净值 和 持仓数据 指数价格数据""" Fund().update_fund_holding() MfcData().load_mfc_public_fund_nav() update_end_date = datetime.today().strftime("%Y%m%d") update_beg_date = Date().get_trade_date_offset(update_end_date, -40) Index().load_index_factor(index_code='H00985.CSI', beg_date=update_beg_date, end_date=update_end_date) Index().load_index_factor(index_code="885001.WI", beg_date=update_beg_date, end_date=update_end_date) Index().load_index_factor(index_code="000300.SH", beg_date=update_beg_date, end_date=update_end_date) Index().load_index_factor(index_code="000905.SH", beg_date=update_beg_date, end_date=update_end_date)