def check_alpha_factor_update_date(self): """ 检查所有Alpha因子最后更新时间 """ factor_name_list = AlphaFactor().get_all_alpha_factor_name() result = pd.DataFrame([], columns=['开始日期', '结束日期'], index=factor_name_list) for i in range(0, len(factor_name_list)): factor_name = factor_name_list[i] try: print("######### 检查更新日期 %s 数据 ############" % factor_name) factor = AlphaFactor().get_alpha_factor_exposure(factor_name) factor = factor.T.dropna(how='all').T result.loc[factor_name, '开始日期'] = factor.columns[0] result.loc[factor_name, '结束日期'] = factor.columns[-1] result.loc[factor_name, "最后一天有效数据个数"] = factor.iloc[:, -1].count() result.loc[factor_name, "最后一天股票个数"] = len(factor.iloc[:, -1]) result.loc[factor_name, "最后一天有效数据比率"] = factor.iloc[:, -1].count() / len( factor.iloc[:, -1]) except Exception as e: print(e) result.loc[factor_name, '开始日期'] = "" result.loc[factor_name, '结束日期'] = "" result.loc[factor_name, "最后一天有效数据个数"] = "" result.loc[factor_name, "最后一天股票个数"] = "" result.loc[factor_name, "最后一天有效数据比率"] = "" print("########### %s 检查更新数据 为空 !!!###########" % factor_name) out_file = os.path.join(self.data_path, "AlphaFactorUpdateDate.xlsx") we = WriteExcel(out_file) ws = we.add_worksheet("更新数据") num_format_pd = pd.DataFrame([], columns=result.columns, index=['format']) num_format_pd.loc['format', :] = '0' num_format_pd.loc['format', ['最后一天有效数据比率']] = '0.00%' we.write_pandas(result, ws, begin_row_number=0, begin_col_number=1, num_format_pd=num_format_pd, color="blue", fillna=True) we.close()
def cal_cma_factor_pct(self): """ 计算高资产增长率股票相对于低资产增长率股票的超额收益(分成三组)投资水平风险代表投资风险水平 """ name = "CMA" bp = AlphaFactor().get_alpha_factor_exposure("alpha_raw_asset_yoy") stock_pct = Stock().read_factor_h5("Pct_chg") date_series = list(set(stock_pct.columns) & set(bp.columns)) date_series.sort() result = pd.DataFrame([], index=date_series, columns=[name]) for i_date in range(1, len(date_series)): date = date_series[i_date] last_date = date_series[i_date - 1] data_date = pd.concat([bp[last_date], stock_pct[date]], axis=1) data_date = data_date.dropna() data_date.columns = ['LastAssetYOY', 'Return'] data_date = data_date.sort_values(by=['LastAssetYOY']) location = int(len(data_date) / 3) low_assetyoy_stock_pct_mean = data_date.loc[data_date.index[0:location], 'Return'].mean() data_date = data_date.sort_values(by=['LastAssetYOY'], ascending=False) location = int(len(data_date) / 3) high_assetyoy_stock_pct_mean = data_date.loc[data_date.index[0:location], 'Return'].mean() result.loc[date, name] = high_assetyoy_stock_pct_mean - low_assetyoy_stock_pct_mean result = result.dropna() result['CumSumReturn'] = result[name].cumsum() result.to_csv(os.path.join(self.data_path, 'factor_return', 'FactorReturn_%s.csv' % name))
def cal_mfc_holding_alpha_exposure_date(fund_name, factor_name_list, date): """ 计算泰达持仓对alpha因子的暴露(满仓)""" from quant.project.multi_factor.alpha_model.exposure.alpha_factor import AlphaFactor date = Date().get_trade_date_offset(date, 0) try: holding_data = MfcGetData().get_fund_security(date) holding_data = holding_data[["基金名称", "证券代码", "市值", '证券类别']] holding_data = holding_data[holding_data["基金名称"] == fund_name] holding_data = holding_data[holding_data['证券类别'] == "股票"] holding_data.columns = ["FundName", "StockCode", "Weight", 'Type'] holding_data['Weight'] = holding_data['Weight'] / holding_data[ 'Weight'].sum() holding_data.StockCode = holding_data.StockCode.map( CodeFormat().stock_code_add_postfix) holding_data.index = holding_data.StockCode weight = holding_data exposure = pd.DataFrame() for factor_name in factor_name_list: alpha = AlphaFactor().get_alpha_factor_exposure(factor_name) exposure_add = pd.DataFrame(alpha[date]) exposure_add.columns = [factor_name] exposure = pd.concat([exposure, exposure_add], axis=1) data = pd.concat([weight, exposure], axis=1) data = data.sort_values(by=['Weight'], ascending=True) data = data.dropna(subset=["Weight"]) res = pd.DataFrame([], columns=factor_name_list, index=[date]) if data['Weight'].sum() > 0.0: for i_col in range(len(exposure.columns)): factor_name = exposure.columns[i_col] data_factor = data[["Weight", factor_name]] data_factor = data_factor.dropna() exposure_sum = (data_factor["Weight"] * data_factor[factor_name]).sum() res.loc[date, factor_name] = exposure_sum / data['Weight'].sum() print(" Calculate Mfcteda Fund %s Alpha Exposure at %s" % (fund_name, date)) else: print( " Calculate Mfcteda Fund %s At %s of Weight Stock is Zero" % (fund_name, date)) return res except Exception as e: print(" Calculate Mfcteda Fund %s Alpha Exposure at %s is Null " % (fund_name, date)) res = pd.DataFrame([], columns=factor_name_list, index=[date]) return res
def ew_to_all_major_alpha(self, stock_pool_name, beg_date, end_date, period): """ 等权合成因子 剔除ICIR表现不好的因子及因子值变化太快的因子 """ factor_list = AlphaFactor().get_all_alpha_factor_file() major_factor_list = list(set(factor_list.index)) for i in range(len(major_factor_list)): major_factor = major_factor_list[i] factor_select = factor_list[factor_list.index == major_factor] factor_name_list = list(factor_select["因子名"].values) result = self.ew_to_major_alpha(factor_name_list, stock_pool_name, beg_date, end_date, period) AlphaSplit().save_alpha_res_exposure(result, major_factor, stock_pool_name)
def ew_to_alpha(self, stock_pool_name): """ 等权大类因子 """ major_factor_list = AlphaFactor().get_major_alpha_name() result = pd.Panel() for i in range(len(major_factor_list)): major_factor = major_factor_list[i] alpha = AlphaSplit().get_alpha_res_exposure(major_factor, stock_pool_name) result = pd.concat([result, alpha]) result.items = major_factor_list result_mean = result.mean(axis=0) AlphaSplit().save_alpha_res_exposure(result_mean, "alpha", stock_pool_name)
def alpha_pool(self, beg_date, end_date, period, stock_pool_name): """ 更新Alpha因子 在某个股票池 """ # AlphaSplit().split_alpha_all(beg_date, end_date, "D", stock_pool_name) # factor_name_list = AlphaFactor().get_all_alpha_factor_name() # AlphaSummary().cal_all_factor_return("20040101", end_date, factor_name_list, period, stock_pool_name, 1) # AlphaSummary().cal_all_factor_summary("20040101", end_date, factor_name_list, period, stock_pool_name, 1) # AlphaConcat().ew_to_all_major_alpha(stock_pool_name, beg_date, end_date, period) factor_name_list = AlphaFactor().get_major_alpha_name() AlphaSummary().cal_all_factor_return("20040101", end_date, factor_name_list, period, stock_pool_name, 1) AlphaSummary().cal_all_factor_summary("20040101", end_date, factor_name_list, period, stock_pool_name, 1) # AlphaConcat().ew_to_alpha(stock_pool_name) AlphaSummary().cal_all_factor_return("20040101", end_date, ["alpha"], period, stock_pool_name, 1) AlphaSummary().cal_all_factor_summary("20040101", end_date, ["alpha"], period, stock_pool_name, 1) AlphaSummary().concat_summary(stock_pool_name)
def split_alpha_all(self, beg_date, end_date, period="W", stock_pool_name="AllChinaStockFilter", force=1): """ 拆分所有Alpha """ alpha_factor_list = AlphaFactor().get_all_alpha_factor_name() for i in range(0, len(alpha_factor_list)): alpha_name = alpha_factor_list[i] hdf_res_path = os.path.join(self.data_path, stock_pool_name, "res_alpha\hdf") file = os.path.join(hdf_res_path, alpha_name + '.h5') if (not os.path.exists(file)) or (force == 1): self.split_alpha(beg_date, end_date, alpha_name, period, stock_pool_name) else: print("%s Already Exist" % alpha_name)
def cal_factor_exposure(self, beg_date, end_date, index_code): """ 计算指标数值 """ short_term = 5 long_term = 90 data = AlphaFactor().get_alpha_factor_exposure("alpha_raw_roe") weight = Index().get_weight(index_code) date_series = Date().get_trade_date_offset(beg_date, end_date) date_series = set(date_series) data = data.dropna() data['DiffRatio'] = data['Diff'] / data['CLOSE'] data['RawTimer'] = data['DiffRatio'] data['Timer'] = data['RawTimer'].map(self.score_average_diff) file = os.path.join(self.data_path, 'exposure', '%s_%s.csv' % (self.factor_name, index_code)) data = data.dropna(how="all") data.to_csv(file)
for i in range(1, len(concat_data.columns)): col = concat_data.columns[i] corr.loc[col, 'Corr'] = concat_data.iloc[:, 0].corr(concat_data.loc[:, col]) return corr if __name__ == "__main__": # Data ########################################################################################## from quant.stock.barra import Barra from quant.project.multi_factor.alpha_model.exposure.alpha_factor import AlphaFactor name = 'alpha_raw_ep' date = "20171229" data_pandas = AlphaFactor().get_alpha_factor_exposure(name) factor_series = data_pandas[date] neutral_frame = Barra().get_factor_exposure_date( date, type_list=['STYLE', 'INDUSTRY']) params, t_values, factor_res = FactorNeutral().factor_exposure_neutral( factor_series, neutral_frame) print(params) print(factor_res) print(t_values) ##########################################################################################
def split_alpha(self, beg_date, end_date, factor_name, period="W", stock_pool_name="AllChinaStockFilter"): """ 计算残差Alpha 回归风格因子和行业 计算在风格和行业上的暴露 """ alpha = AlphaFactor().get_standard_alpha_factor(factor_name) date_series = Date().get_trade_date_series(beg_date, end_date, period=period) barra_date_series = Barra().get_exposure_date_series() date_series = list( set(date_series) & set(alpha.columns) & set(barra_date_series)) date_series.sort() res_alpha = pd.DataFrame() exposure_risk = pd.DataFrame() for i_date in range(len(date_series)): date = date_series[i_date] alpha_date = pd.DataFrame(alpha[date]) alpha_date.columns = ['Alpha'] alpha_date = alpha_date.dropna() risk_exposure = Barra().get_factor_exposure_date( date, type_list=['STYLE', 'INDUSTRY', "COUNTRY"]) stock_pool = Stock().get_invest_stock_pool( date=date, stock_pool_name=stock_pool_name) stock_pool = list( set(stock_pool) & set(risk_exposure.index) & set(alpha_date.index)) stock_pool.sort() alpha_date = alpha_date.loc[stock_pool, "Alpha"] risk_exposure = risk_exposure.loc[stock_pool, :] concat_data = pd.concat([alpha_date, risk_exposure], axis=1) concat_data = concat_data.dropna() if len(concat_data) > self.min_stock_number: factor_val = concat_data.iloc[:, 0] neutral_val = concat_data.iloc[:, 1:] print("Split %s Alpha Exposure At %s %s" % (factor_name, date, stock_pool_name)) model = sm.OLS(factor_val.values, neutral_val.values) regress = model.fit() params = pd.DataFrame(regress.params, index=neutral_val.columns, columns=['param']) factor_res = factor_val - regress.predict(neutral_val) params = pd.DataFrame(params) params.columns = [date] res_alpha_date = pd.DataFrame(factor_res) res_alpha_date.columns = [date] exposure_risk = pd.concat([exposure_risk, params], axis=1) res_alpha = pd.concat([res_alpha, res_alpha_date], axis=1) else: print("Split %s Alpha Exposure At %s %s is Null" % (factor_name, date, stock_pool_name)) res_alpha = FactorPreProcess().remove_extreme_value_mad(res_alpha) res_alpha = FactorPreProcess().standardization(res_alpha) self.save_alpha_risk_exposure(exposure_risk, factor_name, stock_pool_name) self.save_alpha_res_exposure(res_alpha, factor_name, stock_pool_name)