def load_fund_factor(self, factor_name, beg_date, end_date): beg_date = Date().change_to_str(beg_date) end_date = Date().change_to_str(end_date) new_data = FinDb().load_raw_data_filter_period(factor_name, beg_date, end_date) fund_info_data = FundStatic().get_fund_info() val_name = Parameter().get_load_findb_val_name(factor_name) new_data = pd.merge(new_data, fund_info_data, on="证券内码", how='inner') new_data = pd.DataFrame( new_data[val_name].values, index=[list(new_data['基金代码'].values), list(new_data['日期'].values)]) new_data = new_data.sort_index() new_data = new_data[~new_data.index.duplicated()] new_data = new_data.unstack() new_data.columns = new_data.columns.droplevel(level=0) new_data = new_data.T new_data = new_data.dropna(how='all') new_data.index = new_data.index.map(str) out_file = Parameter().get_read_file(factor_name) if os.path.exists(out_file): data = pd.read_csv(out_file, encoding='gbk', index_col=[0]) data.index = data.index.map(str) data = pandas_add_row(data, new_data) else: print(" File No Exist ", factor_name) data = new_data data = data.dropna(how='all') data.to_csv(out_file)
def GetAllFundAllDateFactorAlphaFile(in_path, out_path, factor_name_list, date_series): # params #################################################################################### # in_path = 'E:\\3_Data\\4_fund_data\\7_fund_select_stock\\StockAlpha\\' # out_path = 'E:\\3_Data\\4_fund_data\\7_fund_select_stock\\FundSelectStockAlpha\\' # # factor_name_list = ["TotalMarketValue", "BP", "IncomeYOYDaily", "ROETTMDaily", "Industry"] # # beg_date = "20170530" # end_date = "20180630" # date_series = Date().get_normal_date_series(beg_date, end_date, "S") if not os.path.exists(out_path): os.makedirs(out_path) # read data #################################################################################### fund_holding_all = Fund().get_fund_holding_all() # cal alpha #################################################################################### for i_factor in range(len(factor_name_list)): factor_name = factor_name_list[i_factor] stock_alpha = GetStockAlphaAtFactorFile(in_path, factor_name) for i_date in range(len(date_series)): report_date = date_series[i_date] fund_holding_date = fund_holding_all[fund_holding_all['Date'] == report_date] alpha_date = GetAllFundAlphaOnFactorFile(stock_alpha, fund_holding_date, factor_name, report_date) if i_date == 0: new_data = alpha_date else: new_data = pd.concat([new_data, alpha_date], axis=1) new_data = new_data.T.dropna(how="all") filename = os.path.join(out_path, 'FundSelectStockAlpha_' + factor_name + '.csv') if os.path.exists(filename): old_data = pd.read_csv(filename, index_col=[0], encoding='gbk') old_data.index = old_data.index.map(str) result = pandas_add_row(old_data, new_data) else: result = new_data result.to_csv(filename)
def stock_ratio_10(beg_date, end_date): factor_name = "Stock_Ratio_10" fund_holder = Fund().get_fund_holding_all() quarter_date = Date().get_last_fund_quarter_date(end_date) position_all = Fund().get_fund_factor("Stock_Ratio", date_list=[quarter_date], fund_pool=None).T position_all.columns = ['Stock_Weight'] position_all = position_all[position_all['Stock_Weight'] > 65] code_list = list(position_all.index) date_list = Date().get_normal_date_series(beg_date=beg_date, end_date=end_date, period="Q") code_list.sort() date_list.sort() new_data = pd.DataFrame([], index=code_list, columns=date_list) for i_date in range(len(date_list)): for i_fund in range(len(code_list)): fund_code = code_list[i_fund] date = date_list[i_date] holder = fund_holder[fund_holder.FundCode == fund_code] holder = holder[holder.Date == date] holder = holder.sort_values(by=['Weight'], ascending=False) holder = holder.reset_index(drop=True) if len(holder) >= 10: holder = holder.ix[0:10, :] new_data.ix[fund_code, date] = holder.Weight.sum() print("计算 %s 在 %s 的前10大重仓股票为 %s" % (fund_code, date, holder.Weight.sum())) out_file = Parameter().get_read_file(factor_name) if os.path.exists(out_file): data = pd.read_csv(out_file, encoding='gbk', index_col=[0]) data.index = data.index.map(str) data = pandas_add_row(data, new_data) else: print(" File No Exist ", factor_name) data = new_data data.to_csv(out_file)
def cal_index_exposure_period(self, index_code="000300.SH", beg_date="20031231", end_date=datetime.today().strftime("%Y%m%d"), period="D"): date_series_daily = Date().get_trade_date_series(beg_date, end_date, period=period) for i_date in range(len(date_series_daily)): date = date_series_daily[i_date] res = self.cal_index_exposure_date(index_code, date) if i_date == 0: new_data = res else: new_data = pd.concat([new_data, res], axis=0) out_file = os.path.join(self.path, "Index_Barra_Exposure_" + index_code + '.csv') if os.path.exists(out_file): data = pd.read_csv(out_file, encoding='gbk', index_col=[0]) data.index = data.index.map(str) data = pandas_add_row(data, new_data) else: data = new_data data.to_csv(out_file)
def GetAllStockAllDateAlpha(path, factor_name_list, date_series): #################################################################################### # path = 'E:\\3_Data\\4_fund_data\\7_fund_select_stock\\FundSelectStockAlpha\\' # factor_name_list = ["TotalMarketValue", "BP", "IncomeYOYDaily", "ROETTMDaily"] # beg_date = "20170530" # end_date = "20180630" # date_series = Date().get_normal_date_series(beg_date, end_date, "S") # params #################################################################################### code_list = Stock().get_all_stock_code_now() if not os.path.exists(path): os.makedirs(path) # read data #################################################################################### industry = Stock().get_factor_h5("industry_citic1", None, "primary_mfc") price = Stock().get_factor_h5("PriceCloseAdjust", None, 'alpha_dfc') # cal fund alpha all date all fund all factor #################################################################################### for i_factor in range(len(factor_name_list)): factor_name = factor_name_list[i_factor] factor = Stock().get_factor_h5(factor_name, None, "alpha_dfc") new_data = pd.DataFrame([], index=code_list, columns=date_series) for i_date in range(len(date_series)): report_date = date_series[i_date] for i_stock in range(len(code_list)): code = code_list[i_stock] alpha = GetStockAlphaAtFactor(factor, price, code, report_date) new_data.ix[code, report_date] = alpha print(code, report_date, factor_name, alpha) new_data = new_data.T.dropna(how="all") filename = os.path.join(path, 'StockAlpha_' + factor_name + '.csv') if os.path.exists(filename): old_data = pd.read_csv(filename, index_col=[0], encoding='gbk') old_data.index = old_data.index.map(str) result = pandas_add_row(old_data, new_data) else: result = new_data result.to_csv(filename) # cal fund alpha all date all fund on industry #################################################################################### factor_name = "Industry" new_data = pd.DataFrame([], index=code_list, columns=date_series) for i_date in range(len(date_series)): report_date = date_series[i_date] for i_stock in range(len(code_list)): code = code_list[i_stock] alpha = GetStockAlphaAtIndustry(industry, price, code, report_date) new_data.ix[code, report_date] = alpha print(code, report_date, factor_name, alpha) new_data = new_data.T.dropna(how="all") filename = os.path.join(path, 'StockAlpha_' + factor_name + '.csv') if os.path.exists(filename): old_data = pd.read_csv(filename, index_col=[0], encoding='gbk') old_data.index = old_data.index.map(str) result = pandas_add_row(old_data, new_data) else: result = new_data result.to_csv(filename)
# GetAllStockAllDateAlpha(path, factor_name_list, date_series) code_list = Stock().get_all_stock_code_now() if not os.path.exists(path): os.makedirs(path) # read data #################################################################################### industry = Stock().get_factor_h5("industry_citic2", None, "primary_mfc") price = Stock().get_factor_h5("PriceCloseAdjust", None, 'alpha_dfc') factor_name = "Industry2" new_data = pd.DataFrame([], index=code_list, columns=date_series) for i_date in range(len(date_series)): report_date = date_series[i_date] for i_stock in range(len(code_list)): code = code_list[i_stock] alpha = GetStockAlphaAtIndustry(industry, price, code, report_date) new_data.ix[code, report_date] = alpha print(code, report_date, factor_name, alpha) new_data = new_data.T.dropna(how="all") filename = os.path.join(path, 'StockAlpha_' + factor_name + '.csv') if os.path.exists(filename): old_data = pd.read_csv(filename, index_col=[0], encoding='gbk') old_data.index = old_data.index.map(str) result = pandas_add_row(old_data, new_data) else: result = new_data result.to_csv(filename)
def cal_fund_holder_exposure(self, fund, beg_date, end_date): # 每半年计算一次 type_list = ['STYLE', 'COUNTRY', 'INDUSTRY'] date_series = Date().get_normal_date_series(beg_date, end_date, period='S') for i_date in range(len(date_series)): date = date_series[i_date] report_date = Date().get_normal_date_month_end_day(date) trade_date = Date().get_trade_date_month_end_day(date) barra_name = list( Barra().get_factor_name(type_list)['NAME_EN'].values) barra_exposure = Barra().get_factor_exposure_date( trade_date, type_list) fund_holding = FundHolder().get_fund_holding_report_date_fund( fund, report_date) print("########## Calculate Holder Exposure %s %s ##########" % (fund, report_date)) if (barra_exposure is None) or (fund_holding is None): exposure_add = pd.DataFrame([], columns=barra_name, index=[report_date]) else: fund_holding = fund_holding['Weight'] data = pd.concat([fund_holding, barra_exposure], axis=1) data = data.dropna() if (len(data) == 0) or (data is None): exposure_add = pd.DataFrame([], columns=barra_name, index=[report_date]) else: exposure_add = pd.DataFrame([], columns=barra_name, index=[report_date]) for i_factor in range(len(barra_name)): factor_name = barra_name[i_factor] data_weight = data[['Weight', factor_name]] data_weight['StockExposure'] = data['Weight'] * data[ factor_name] exposure_add.ix[report_date, factor_name] = data_weight[ 'StockExposure'].sum() / 100.0 if i_date == 0: exposure_new = exposure_add else: exposure_new = pd.concat([exposure_new, exposure_add], axis=0) # 合并新数据 #################################################################### out_path = Parameter().get_read_file(self.holder_exposure_name) out_file = os.path.join(out_path, 'Fund_Holder_Exposure_' + fund + '.csv') if os.path.exists(out_file): exposure_old = pd.read_csv(out_file, index_col=[0], encoding='gbk') exposure_old.index = exposure_old.index.map(str) params = pandas_add_row(exposure_old, exposure_new) else: params = exposure_new params.to_csv(out_file)
def cal_fund_regression_exposure(self, fund, beg_date, end_date, period="M"): # 参数 #################################################################### up_style_exposure = 1.5 up_position_exposure = 0.95 low_position_exposure = 0.75 position_sub = 0.10 beg_date = Date().change_to_str(beg_date) end_date = Date().change_to_str(end_date) # 取得数据 #################################################################### type_list = ['STYLE', 'COUNTRY'] barra_name = list(Barra().get_factor_name(type_list)['NAME_EN'].values) barra_return = Barra().get_factor_return(None, None, type_list) date_series = Date().get_trade_date_series(beg_date, end_date, period=period) fund_return = FundFactor().get_fund_factor("Repair_Nav_Pct", None, [fund]) data = pd.concat([fund_return, barra_return], axis=1) data = data.dropna() print(" Fund Code Total Len %s " % len(data)) factor_number = len(barra_name) # 循环回归计算每天的暴露 #################################################################### for i_date in range(0, len(date_series)): period_end_date = date_series[i_date] period_beg_date = Date().get_trade_date_offset( period_end_date, -self.regression_period) period_date_series = Date().get_trade_date_series( period_beg_date, period_end_date) data_periods = data.ix[period_date_series, :] data_periods = data_periods.dropna() quarter_date = Date().get_last_fund_quarter_date(period_end_date) stock_ratio = (FundFactor().get_fund_factor( "Stock_Ratio", [quarter_date], [fund]) / 100).values[0][0] print( "########## Calculate Regression Exposure %s %s %s %s %s %s ##########" % (fund, period_beg_date, period_end_date, quarter_date, len(data_periods), stock_ratio)) if len(data_periods) > self.regression_period_min: y = data_periods.ix[:, 0].values x = data_periods.ix[:, 1:].values x_add = sm.add_constant(x) low_position_exposure = max(stock_ratio - position_sub, low_position_exposure) print(low_position_exposure) P = 2 * np.dot(x_add.T, x_add) Q = -2 * np.dot(x_add.T, y) G_up = np.diag(np.ones(factor_number + 1)) G_low = -np.diag(np.ones(factor_number + 1)) G = np.row_stack((G_up, G_low)) h_up = np.row_stack((np.ones( (factor_number, 1)) * up_style_exposure, np.array([up_position_exposure]))) h_low = np.row_stack((np.ones( (factor_number, 1)) * up_style_exposure, np.array([-low_position_exposure]))) h = np.row_stack((h_up, h_low)) P = matrix(P) Q = matrix(Q) G = matrix(G) h = matrix(h) try: result = sol.qp(P, Q, G, h) params_add = pd.DataFrame(np.array(result['x'][1:]), columns=[period_end_date], index=barra_name).T print(params_add) except: params_add = pd.DataFrame([], columns=[period_end_date], index=barra_name).T print(params_add) else: params_add = pd.DataFrame([], columns=[period_end_date], index=barra_name).T print(params_add) if i_date == 0: params_new = params_add else: params_new = pd.concat([params_new, params_add], axis=0) # 合并新数据 #################################################################### out_path = Parameter().get_read_file(self.regression_exposure_name) out_file = os.path.join(out_path, 'Fund_Regression_Exposure_' + fund + '.csv') if os.path.exists(out_file): params_old = pd.read_csv(out_file, index_col=[0], encoding='gbk') params_old.index = params_old.index.map(str) params = pandas_add_row(params_old, params_new) else: params = params_new print(params) params.to_csv(out_file)