def cal_factor_barra_size(beg_date, end_date): """ 因子说明 计算总市值的对数值 """ # param ################################################################################# raw_factor_name = 'RAW_CNE5_SIZE' factor_name = 'NORMAL_CNE5_SIZE' beg_date = Date().change_to_str(beg_date) end_date = Date().change_to_str(end_date) # read data ################################################################################# price_unadjust = Stock().get_factor_h5("Price_Unadjust", None, "primary_mfc") free_share = Stock().get_factor_h5("TotalShare", None, "primary_mfc") price_unadjust = price_unadjust.ix[:, beg_date:end_date] total_share = free_share.ix[:, beg_date:end_date] # calculate data ################################################################################# [price_unadjust, total_share] = FactorPreProcess().make_same_index_columns( [price_unadjust, total_share]) total_market_value = price_unadjust.mul(free_share) log_size_data = np.log(total_market_value) # save data ################################################################################# Stock().write_factor_h5(log_size_data, raw_factor_name, 'barra_risk_dfc') log_size_data = FactorPreProcess().remove_extreme_value_mad(log_size_data) log_size_data = FactorPreProcess().standardization_free_mv(log_size_data) Stock().write_factor_h5(log_size_data, factor_name, 'barra_risk_dfc') return log_size_data
def cal_factor_liquidity_stom(beg_date, end_date): """ LIQUIDITY_STOM 最近21个交易日的换手率总和的对数值 """ # params ################################################################################## raw_factor_name = "RAW_CNE5_LIQUIDITY_STOM" factor_name = "NORMAL_CNE5_LIQUIDITY_STOM" M = 21 # read data ################################################################################## turnover_daily = Stock().get_factor_h5("TurnOver_Daily", None, 'primary_mfc').T data_beg_date = Date().get_trade_date_offset(beg_date, -M) end_date = Date().change_to_str(end_date) turnover_daily = turnover_daily.ix[data_beg_date:end_date, :] turnover_month = turnover_daily.rolling(window=M).sum().applymap(np.log) turnover_month = turnover_month.ix[beg_date:end_date, :] turnover_month = turnover_month.replace(-np.inf, np.nan) turnover_month = turnover_month.replace(np.inf, np.nan) turnover_month = turnover_month.dropna(how='all').T # save data ################################################################################## Stock().write_factor_h5(turnover_month, raw_factor_name, 'barra_risk_dfc') turnover_month = FactorPreProcess().remove_extreme_value_mad(turnover_month) turnover_month = FactorPreProcess().standardization_free_mv(turnover_month) Stock().write_factor_h5(turnover_month, factor_name, 'barra_risk_dfc') ################################################################################## return turnover_month
def cal_factor_barra_momentum(beg_date=None, end_date=None): """ 因子说明:长期动量减去短期动量 """ # params ###################################################################################### raw_factor_name = 'RAW_CNE5_MOMENTUM' factor_name = "NORMAL_CNE5_MOMENTUM" L = 21 T = 504 half_life = 126 Min_T = 400 # read data ################################################################################# pct = Stock().get_factor_h5("Pct_chg", None, 'primary_mfc').T pct = np.log(pct / 100.0 + 1.0) * 100 if beg_date is None: beg_date = pct.index[0] if end_date is None: end_date = pct.index[-1] # calculate data daily ################################################################################# date_series = Date().get_trade_date_series(beg_date, end_date) res_data = pd.DataFrame([], index=date_series, columns=pct.columns) for i_index in range(len(date_series)): current_date = date_series[i_index] data_end = Date().get_trade_date_offset(current_date, -L+1) data_beg = Date().get_trade_date_offset(current_date, -L-T+2) pct_period = pct.ix[data_beg: data_end, :] pct_period = pct_period.dropna(how='all') if len(pct_period) > Min_T: print('Calculating Barra Risk factor %s at date %s' % (factor_name, current_date)) weight = exponential_weight(len(pct_period), half_life) weight_mat = np.tile(np.row_stack(weight), (1, len(pct_period.columns))) weight_pd = pd.DataFrame(weight_mat, index=pct_period.index, columns=pct_period.columns) pct_weight = pct_period.mul(weight_pd) res_data.ix[current_date, :] = pct_weight.sum(skipna=False) else: print('Calculating Barra Risk factor %s at date %s is null' % (factor_name, current_date)) res_data = res_data.dropna(how='all').T Stock().write_factor_h5(res_data, raw_factor_name, 'barra_risk_dfc') res_data = FactorPreProcess().remove_extreme_value_mad(res_data) res_data = FactorPreProcess().standardization_free_mv(res_data) Stock().write_factor_h5(res_data, factor_name, 'barra_risk_dfc')
def cal_factor_barra_cube_size(beg_date=None, end_date=None): """ 因子说明 Barra Risk Model USE4 中 市值因子的立方和市值因子回归取残差 再去极值和标准化 """ # params ########################################################################## factor_name = "NORMAL_CNE5_CUBE_SIZE" size_data = Stock().get_factor_h5("NORMAL_CNE5_SIZE", None, 'barra_risk_dfc').T square_size_data = size_data**3 if beg_date is None: beg_date = size_data.index[0] if end_date is None: end_date = size_data.index[-1] date_series = Date().get_trade_date_series(beg_date, end_date) res_data = pd.DataFrame([], index=date_series, columns=size_data.columns) # calculate everyday ########################################################################## for i_index in range(len(date_series)): current_date = date_series[i_index] if current_date in list(square_size_data.index): print('Calculating Barra Risk factor %s at date %s' % (factor_name, current_date)) regression_data = pd.concat([ size_data.ix[current_date, :], square_size_data.ix[current_date, :] ], axis=1) regression_data.columns = ['x', 'y'] regression_data = regression_data.dropna() y = regression_data['y'].values x = regression_data['x'].values x_add = sm.add_constant(x) model = sm.OLS(y, x_add).fit() regression_data['res'] = regression_data['y'] - model.fittedvalues res_data.ix[i_index, :] = regression_data['res'] else: print('Calculating Barra Risk factor %s at date %s is null' % (factor_name, current_date)) res_data = res_data.T.dropna(how='all') res_data = FactorPreProcess().remove_extreme_value_mad(res_data) res_data = FactorPreProcess().standardization_free_mv(res_data) Stock().write_factor_h5(res_data, factor_name, 'barra_risk_dfc')
def PriceHighAdjust(beg_date, end_date): """ 因子说明 :复权最高价格 """ # param ################################################################################# factor_name = "PriceHighAdjust" beg_date = Date().change_to_str(beg_date) end_date = Date().change_to_str(end_date) # read data ################################################################################# price_unadjust = Stock().get_factor_h5("PriceHighUnadjust", None, "primary_mfc") price_facor = Stock().get_factor_h5("AdjustFactor", None, "primary_mfc") price_unadjust = price_unadjust.ix[:, beg_date:end_date] price_facor = price_facor.ix[:, beg_date:end_date] # calculate data ################################################################################# [price_unadjust, price_facor] = FactorPreProcess().make_same_index_columns([price_unadjust, price_facor]) price_adjust = price_unadjust.mul(price_facor) # save data ############################################################################# Stock().write_factor_h5(price_adjust, factor_name, "alpha_dfc") return price_adjust
def TotalMarketValue(beg_date, end_date): """ 计算股票的总市值 = 总股本 * 未复权股价 """ # param ################################################################################# factor_name = "TotalMarketValue" beg_date = Date().change_to_str(beg_date) end_date = Date().change_to_str(end_date) # read data ################################################################################# price_unadjust = Stock().get_factor_h5("Price_Unadjust", None, "primary_mfc") free_share = Stock().get_factor_h5("TotalShare", None, "primary_mfc") price_unadjust = price_unadjust.ix[:, beg_date:end_date] free_share = free_share.ix[:, beg_date:end_date] # calculate data ################################################################################# [price_unadjust, free_share] = FactorPreProcess().make_same_index_columns([price_unadjust, free_share]) free_market_value = price_unadjust.mul(free_share) # free_market_value /= 100000000.0 # save data ################################################################################ Stock().write_factor_h5(free_market_value, factor_name, "alpha_dfc") return free_market_value
def cal_factor_barra_book_to_price(beg_date, end_date): """ 因子说明: 净资产/总市值, 根据最新财报更新数据 披露日期 为 最近财报 """ # param ################################################################################# raw_factor_name = 'RAW_CNE5_BOOK_TO_PRICE' factor_name = "NORMAL_CNE5_BOOK_TO_PRICE" # read data ################################################################################# holder = Stock().get_factor_h5("TotalShareHoldeRequity", None, "primary_mfc") total_mv = Stock().get_factor_h5("TotalMarketValue", None, "alpha_dfc") # data precessing ################################################################################# report_data = Stock().get_factor_h5("OperatingIncome" + "Daily", "ReportDate", 'primary_mfc') holder = StockFactorOperate().change_quarter_to_daily_with_disclosure_date( holder, report_data, beg_date, end_date) [holder, total_mv] = Stock().make_same_index_columns([holder, total_mv]) holder_price = holder.div(total_mv) pb_data = holder_price.T.dropna(how='all').T # save data ############################################################################# Stock().write_factor_h5(pb_data, raw_factor_name, 'barra_risk_dfc') pb_data = FactorPreProcess().remove_extreme_value_mad(pb_data) pb_data = FactorPreProcess().standardization_free_mv(pb_data) Stock().write_factor_h5(pb_data, factor_name, 'barra_risk_dfc') return pb_data
def cal_factor_alpha_return(factor_name, beg_date, end_date, cal_period): # param ############################################################################################################### ############################################################################################################### group_number = 8 year_trade_days = 242 min_stock_number = 100 out_path = 'E:\\3_Data\\5_stock_data\\3_alpha_model\\' alpha_remove_extreme_value = True # alpha 因子 取极值 alpha_standard = True # alpha 因子 标准化 alpha_industry_neutral = True # alpha 因子 行业中性 alpha_barra_style_neutral = True # alpha 因子 风格中性 # read data ############################################################################################################### ############################################################################################################### price = Stock().get_factor_h5("PriceCloseAdjust", None, "alpha_dfc") alpha_val = Stock().get_factor_h5(factor_name, None, "alpha_dfc") industry = Stock().get_factor_h5("industry_citic1", None, "primary_mfc") industry = industry.applymap(lambda x: x.decode('utf-8')) [alpha_val, industry ] = FactorPreProcess().make_same_index_columns([alpha_val, industry]) if alpha_barra_style_neutral: size = Stock().get_factor_h5("NORMAL_CNE5_SIZE", None, 'barra_risk_dfc') beta = Stock().get_factor_h5("NORMAL_CNE5_BETA", None, 'barra_risk_dfc') nolin_size = Stock().get_factor_h5("NORMAL_CNE5_NON_LINEAR_SIZE", None, 'barra_risk_dfc') momentum = Stock().get_factor_h5("NORMAL_CNE5_MOMENTUM", None, 'barra_risk_dfc') [size, beta, nolin_size] = FactorPreProcess().make_same_index_columns( [size, beta, nolin_size]) beg_date = max(beg_date, price.columns[0], alpha_val.columns[0], beta.columns[0]) end_date = min(end_date, price.columns[-1], alpha_val.columns[-1], beta.columns[-1]) else: beg_date = max(beg_date, price.columns[0], alpha_val.columns[0]) end_date = min(end_date, price.columns[-1], alpha_val.columns[-1]) date_series = Date().get_trade_date_series(beg_date, end_date, period=cal_period) date_series = list(set(date_series) & set(alpha_val.columns)) date_series.sort() # pre process data ############################################################################################################### ############################################################################################################### if alpha_remove_extreme_value: alpha_val = FactorPreProcess().remove_extreme_value_mad(alpha_val) if alpha_standard: alpha_val = FactorPreProcess().standardization(alpha_val) # cal everyday ############################################################################################################### ############################################################################################################### alpha_return = pd.DataFrame([], index=date_series) alpha_exposure = pd.DataFrame([], index=date_series, columns=price.index) for i_date in range(len(date_series) - 2): cur_cal_date = date_series[i_date] next_cal_date = date_series[i_date + 1] buy_date = Date().get_trade_date_offset(cur_cal_date, 1) sell_date = Date().get_trade_date_offset(next_cal_date, 1) print(" Calculating Factor %s Alpha Return At %s" % (factor_name, cur_cal_date)) alpha_return.index.name = 'CalDate' alpha_return.ix[cur_cal_date, "BuyDate"] = buy_date alpha_return.ix[cur_cal_date, "SellDate"] = sell_date alpha_date = alpha_val[cur_cal_date] buy_price = price[buy_date] sell_price = price[sell_date] pct_date = sell_price / buy_price - 1.0 if alpha_industry_neutral: try: industry_date = industry[cur_cal_date] industry_dummy = pd.get_dummies(industry_date) except: continue if len(pd.concat([alpha_date, industry_date], axis=1).dropna()) < min_stock_number: continue else: params, factor_res = factor_neutral( factor_series=alpha_date, neutral_frame=industry_dummy) alpha_date = factor_res alpha_date = FactorPreProcess().remove_extreme_value_mad( alpha_date) alpha_date = FactorPreProcess().standardization(alpha_date) if alpha_barra_style_neutral: try: size_date = size[cur_cal_date] beta_date = beta[cur_cal_date] nolin_size_date = nolin_size[cur_cal_date] momentum_date = momentum[cur_cal_date] except: continue if len(pd.concat([alpha_date, size_date], axis=1).dropna()) < min_stock_number: continue else: barra_risk_exposure = pd.concat( [beta_date, size_date, nolin_size_date, momentum_date], axis=1) barra_risk_exposure.columns = [ 'beta', 'size', 'nolin_size', 'momentum' ] params, factor_res = factor_neutral( factor_series=alpha_date, neutral_frame=barra_risk_exposure) alpha_date = factor_res alpha_date = FactorPreProcess().remove_extreme_value_mad( alpha_date) alpha_date = FactorPreProcess().standardization(alpha_date) alpha_exposure.ix[cur_cal_date, :] = alpha_date res = pd.concat([alpha_date, pct_date], axis=1) res.columns = ['alpha_val', 'period_pct'] res = res.dropna() res = res.sort_values(by=['alpha_val'], ascending=False) labels = ["group_" + str(i) for i in list(range(1, group_number + 1))] res['group'] = pd.cut(res['alpha_val'], bins=group_number, labels=labels) period_return = (res['alpha_val'] * res['period_pct']).mean() alpha_return.ix[cur_cal_date, "FactorReturn"] = period_return information_correlation = res['alpha_val'].corr(res['period_pct']) alpha_return.ix[cur_cal_date, "IC"] = information_correlation group_pct = res.groupby(by=['group'])['period_pct'].mean() for i_label in range(len(labels)): alpha_return.ix[cur_cal_date, labels[i_label]] = group_pct.values[i_label] alpha_return = alpha_return.dropna(subset=['FactorReturn']) alpha_return["CumFactorReturn"] = alpha_return['FactorReturn'].cumsum() cum_labels = ["Cum_" + str(x) for x in labels] alpha_return[cum_labels] = alpha_return[labels].cumsum() # plot ############################################################################################################### ############################################################################################################### # plt_col = [] # plt_col.append("CumFactorReturn") # plt_col.extend(cum_labels) # alpha_return[plt_col].plot() # plt.title(factor_name) # plt.show() # describe annual ############################################################################################################### ############################################################################################################### back_test_beg_date = Date().get_trade_date_offset(date_series[0], 1) back_test_end_date = Date().get_trade_date_offset( date_series[len(date_series) - 1], 1) back_test_days = Date().get_trade_date_diff(back_test_beg_date, back_test_end_date) backtest_year = back_test_days / year_trade_days alpha_return['year'] = alpha_return.index.map( lambda x: datetime.strptime(x, "%Y%m%d").year) year_factor_return = alpha_return.groupby( by=['year'])['FactorReturn'].sum() year_count = alpha_return.groupby(by=['year'])['FactorReturn'].count() year_ic_mean = alpha_return.groupby(by=['year'])['IC'].mean() year_ic_std = alpha_return.groupby(by=['year'])['IC'].std() year_gp_mean = alpha_return.groupby(by=['year'])[labels].mean() year_describe = pd.concat([ year_factor_return, year_count, year_ic_mean, year_ic_std, year_gp_mean ], axis=1) col = ['YearFactorReturn', 'Count', 'IC_mean', 'IC_std'] col.extend(labels) year_describe.columns = col year_describe['YearFactorReturn'] = year_describe[ 'YearFactorReturn'] / year_describe['Count'] * year_count year_describe['IC_IR'] = year_describe['IC_mean'] / year_describe[ 'IC_std'] * np.sqrt(50) year_describe.ix['Sum', 'YearFactorReturn'] = alpha_return[ "CumFactorReturn"].values[-1] / backtest_year year_describe.ix['Sum', 'IC_IR'] = alpha_return["IC"].mean( ) / alpha_return["IC"].std() * np.sqrt(50) year_describe.ix['Sum', 'IC_mean'] = alpha_return["IC"].mean() year_describe.ix['Sum', 'IC_std'] = alpha_return["IC"].std() year_describe.ix['Sum', labels] = year_describe.ix[0:-1, labels].sum() year_describe.index = year_describe.index.map(str) for i in range(len(year_describe)): year = year_describe.index[i] corr_pd = pd.DataFrame(year_describe.ix[year, labels].values, index=labels, columns=['group_return']) corr_pd['group_number'] = (list(range(1, group_number + 1))) year_describe.ix[year, 'Group_Corr'] = corr_pd.corr().ix[0, 1] # save data ############################################################################################################### ############################################################################################################### # alpha_exposure_neutral ############################################################################################################### alpha_exposure = alpha_exposure.astype(np.float) filename = os.path.join(out_path, 'alpha_exposure_neutral', factor_name + "_FactorExposureNeutral.csv") alpha_exposure.T.to_csv(filename) # exposure_corr ############################################################################################################### exposure_corr = pd.DataFrame([], index=alpha_exposure.index, columns=['Exposure_Corr']) for i_date in range(1, len(alpha_exposure.index)): last_exposure_date = alpha_exposure.index[i_date - 1] cur_exposure_date = alpha_exposure.index[i_date] exposure_adjoin = alpha_exposure.ix[ last_exposure_date:cur_exposure_date, :] exposure_adjoin = exposure_adjoin.T.dropna() exposure_corr.ix[cur_exposure_date, 'Exposure_Corr'] = exposure_adjoin.corr().ix[0, 1] exposure_corr = exposure_corr.dropna() exposure_corr.ix['Mean', 'Exposure_Corr'] = exposure_corr['Exposure_Corr'].mean() filename = os.path.join(out_path, 'alpha_exposure_stability', factor_name + "_FactorExposureCorr.csv") exposure_corr.to_csv(filename) # Factor Return ############################################################################################################### filename = os.path.join(out_path, 'alpha_return', factor_name + "_FactorReturn.xlsx") sheet_name = "FactorReturn" we = WriteExcel(filename) ws = we.add_worksheet(sheet_name) num_format_pd = pd.DataFrame([], columns=year_describe.columns, index=['format']) num_format_pd.ix['format', :] = '0.00%' num_format_pd.ix['format', ['Count', 'IC_IR']] = '0.00' we.write_pandas(year_describe, ws, begin_row_number=0, begin_col_number=1, num_format_pd=num_format_pd, color="blue", fillna=True) num_format_pd = pd.DataFrame([], columns=alpha_return.columns, index=['format']) num_format_pd.ix['format', :] = '0.00%' num_format_pd.ix['format', ['year']] = '0' we.write_pandas(alpha_return, ws, begin_row_number=0, begin_col_number=2 + len(year_describe.columns), num_format_pd=num_format_pd, color="blue", fillna=True) we.close()
def cal_factor_alpha_return(factor_name, beg_date, end_date, cal_period): # param ############################################################################################################### ############################################################################################################### group_number = 5 year_trade_days = 242 out_path = 'E:\\3_Data\\5_stock_data\\2_risk_model\\1_barra_risk_model\\' alpha_remove_extreme_value = True # alpha 因子 取极值 alpha_standard = True # alpha 因子 标准化 # read data ############################################################################################################### ############################################################################################################### price = Stock().get_factor_h5("PriceCloseAdjust", None, "primary_dfc") risk_val = Stock().get_factor_h5(factor_name, None, "barra_risk_dfc") beg_date = max(beg_date, price.columns[0], risk_val.columns[0]) end_date = min(end_date, price.columns[-1], risk_val.columns[-1]) date_series = Date().get_trade_date_series(beg_date, end_date, period=cal_period) # pre process data ############################################################################################################### ############################################################################################################### if alpha_remove_extreme_value: risk_val = FactorPreProcess().remove_extreme_value_mad(risk_val) if alpha_standard: risk_val = FactorPreProcess().standardization(risk_val) # cal everyday ############################################################################################################### ############################################################################################################### risk_return = pd.DataFrame([], index=date_series) risk_exposure = pd.DataFrame([], index=date_series, columns=price.index) for i_date in range(len(date_series) - 2): cur_cal_date = date_series[i_date] next_cal_date = date_series[i_date + 1] buy_date = Date().get_trade_date_offset(cur_cal_date, 1) sell_date = Date().get_trade_date_offset(next_cal_date, 1) print(" Calculating Factor %s Risk Return At %s" % (factor_name, cur_cal_date)) risk_return.index.name = 'CalDate' risk_return.ix[cur_cal_date, "BuyDate"] = buy_date risk_return.ix[cur_cal_date, "SellDate"] = sell_date risk_date = risk_val[cur_cal_date] buy_price = price[buy_date] sell_price = price[sell_date] pct_date = sell_price / buy_price - 1.0 risk_date = FactorPreProcess().remove_extreme_value_mad(risk_date) risk_date = FactorPreProcess().standardization(risk_date) risk_exposure.ix[cur_cal_date, :] = risk_date res = pd.concat([risk_date, pct_date], axis=1) res.columns = ['risk_val', 'period_pct'] res = res.dropna() res = res.sort_values(by=['risk_val'], ascending=False) labels = ["group_" + str(i) for i in list(range(1, group_number + 1))] res['group'] = pd.cut(res['risk_val'], bins=group_number, labels=labels) period_return = (res['risk_val'] * res['period_pct']).mean() risk_return.ix[cur_cal_date, "FactorReturn"] = period_return information_correlation = res['risk_val'].corr(res['period_pct']) risk_return.ix[cur_cal_date, "IC"] = information_correlation group_pct = res.groupby(by=['group'])['period_pct'].mean() for i_label in range(len(labels)): risk_return.ix[cur_cal_date, labels[i_label]] = group_pct.values[i_label] risk_return = risk_return.dropna(subset=['FactorReturn']) risk_return["CumFactorReturn"] = risk_return['FactorReturn'].cumsum() cum_labels = ["Cum_" + str(x) for x in labels] risk_return[cum_labels] = risk_return[labels].cumsum() # plot ############################################################################################################### ############################################################################################################### plt_col = [] plt_col.append("CumFactorReturn") plt_col.extend(cum_labels) risk_return[plt_col].plot() plt.show() # describe annual ############################################################################################################### ############################################################################################################### back_test_beg_date = Date().get_trade_date_offset(date_series[0], 1) back_test_end_date = Date().get_trade_date_offset( date_series[len(date_series) - 1], 1) back_test_days = Date().get_trade_date_diff(back_test_beg_date, back_test_end_date) backtest_year = back_test_days / year_trade_days risk_return['IC_abs'] = risk_return['IC'].abs() risk_return['year'] = risk_return.index.map( lambda x: datetime.strptime(x, "%Y%m%d").year) year_factor_return = risk_return.groupby(by=['year'])['FactorReturn'].sum() year_count = risk_return.groupby(by=['year'])['FactorReturn'].count() year_ic_mean = risk_return.groupby(by=['year'])['IC'].mean() year_ic_abs_mean = risk_return.groupby(by=['year'])['IC_abs'].mean() year_ic_std = risk_return.groupby(by=['year'])['IC'].std() year_gp_mean = risk_return.groupby(by=['year'])[labels].mean() year_describe = pd.concat([ year_factor_return, year_count, year_ic_mean, year_ic_abs_mean, year_ic_std, year_gp_mean ], axis=1) col = ['YearFactorReturn', 'Count', 'IC_mean', 'IC_abs_mean', 'IC_std'] col.extend(labels) year_describe.columns = col year_describe['YearFactorReturn'] = year_describe[ 'YearFactorReturn'] / year_describe['Count'] * year_count year_describe['IC_IR'] = year_describe['IC_mean'] / year_describe[ 'IC_std'] * np.sqrt(50) year_describe.ix['Sum', 'YearFactorReturn'] = risk_return[ "CumFactorReturn"].values[-1] / backtest_year year_describe.ix['Sum', 'IC_IR'] = risk_return["IC"].mean( ) / risk_return["IC"].std() * np.sqrt(50) year_describe.ix['Sum', 'IC_mean'] = risk_return["IC"].mean() year_describe.ix['Sum', 'IC_abs_mean'] = risk_return["IC"].abs().mean() year_describe.ix['Sum', 'IC_std'] = risk_return["IC"].std() year_describe.ix['Sum', labels] = year_describe.ix[0:-1, labels].sum() year_describe.index = year_describe.index.map(str) for i in range(len(year_describe)): year = year_describe.index[i] corr_pd = pd.DataFrame(year_describe.ix[year, labels].values, index=labels, columns=['group_return']) corr_pd['group_number'] = (list(range(1, group_number + 1))) year_describe.ix[year, 'Group_Corr'] = corr_pd.corr().ix[0, 1] # save data ############################################################################################################### ############################################################################################################### # exposure_corr ############################################################################################################### risk_exposure = risk_exposure.astype(np.float) exposure_corr = pd.DataFrame([], index=risk_exposure.index, columns=['Exposure_Corr']) for i_date in range(1, len(risk_exposure.index)): last_exposure_date = risk_exposure.index[i_date - 1] cur_exposure_date = risk_exposure.index[i_date] exposure_adjoin = risk_exposure.ix[ last_exposure_date:cur_exposure_date, :] exposure_adjoin = exposure_adjoin.T.dropna() exposure_corr.ix[cur_exposure_date, 'Exposure_Corr'] = exposure_adjoin.corr().ix[0, 1] exposure_corr = exposure_corr.dropna() exposure_corr.ix['Mean', 'Exposure_Corr'] = exposure_corr['Exposure_Corr'].mean() filename = os.path.join(out_path, 'risk_exposure_stability', factor_name + "_FactorExposureCorr.csv") exposure_corr.to_csv(filename) # Factor Return ############################################################################################################### filename = os.path.join(out_path, 'risk_return', factor_name + "_FactorReturn.xlsx") sheet_name = "FactorReturn" we = WriteExcel(filename) ws = we.add_worksheet(sheet_name) num_format_pd = pd.DataFrame([], columns=year_describe.columns, index=['format']) num_format_pd.ix['format', :] = '0.00%' num_format_pd.ix['format', ['Count', 'IC_IR']] = '0.00' we.write_pandas(year_describe, ws, begin_row_number=0, begin_col_number=1, num_format_pd=num_format_pd, color="blue", fillna=True) num_format_pd = pd.DataFrame([], columns=risk_return.columns, index=['format']) num_format_pd.ix['format', :] = '0.00%' num_format_pd.ix['format', ['year']] = '0' we.write_pandas(risk_return, ws, begin_row_number=0, begin_col_number=2 + len(year_describe.columns), num_format_pd=num_format_pd, color="blue", fillna=True) we.close()
def cal_factor_liquidity(beg_date, end_date): """ 因子说明:流动性因子 LIQUIDITY LIQUIDITY_STOM 最近21个交易日的换手率总和的对数值 LIQUIDITY_STOA 最近252个交易日的换手率总和的对数值 LIQUIDITY = 0.35 * LIQUIDITY_STOM + 0.35 * LIQUIDITY_STOQ + 0.3 * LIQUIDITY_STOA LIQUIDITY 在对 SIZE 因子做回归取残差 """ # params ################################################################################## factor_name = "NORMAL_CNE5_LIQUIDITY" A = 252 beg_date = Date().change_to_str(beg_date) end_date = Date().change_to_str(end_date) beg_date = # params ################################################################################## turnover_daily = Stock().get_factor_h5("TurnOver_Daily", None, 'primary_mfc').T turnover_month = turnover_daily.rolling(window=M).sum().applymap(np.log) turnover_quarter = (turnover_daily.rolling(window=Q).sum() / 3.0).applymap(np.log) turnover_yearly = (turnover_daily.rolling(window=A).sum() / 12.0).applymap(np.log) turnover_quarter = turnover_quarter.dropna(how='all').T turnover_yearly = turnover_yearly.dropna(how='all').T Stock().write_factor_h5(turnover_quarter, "RAW_CNE5_LIQUIDITY_STOQ", 'barra_risk_dfc') Stock().write_factor_h5(turnover_yearly, "RAW_CNE5_LIQUIDITY_STOA", 'barra_risk_dfc') turnover_quarter = FactorPreProcess().remove_extreme_value_mad(turnover_quarter) turnover_quarter = FactorPreProcess().standardization_free_mv(turnover_quarter) turnover_yearly = FactorPreProcess().remove_extreme_value_mad(turnover_yearly) turnover_yearly = FactorPreProcess().standardization_free_mv(turnover_yearly) Stock().write_factor_h5(turnover_quarter, "NORMAL_CNE5_LIQUIDITY_STOQ", 'barra_risk_dfc') Stock().write_factor_h5(turnover_yearly, "NORMAL_CNE5_LIQUIDITY_STOA", 'barra_risk_dfc') turnover = 0.35 * turnover_month + 0.35 * turnover_quarter + 0.3 * turnover_yearly turnover = turnover.T.dropna(how='all').T size_data = Stock().get_factor_h5("NORMAL_CNE5_SIZE", None, 'barra_risk_dfc') [size_data, turnover] = FactorPreProcess().make_same_index_columns([size_data, turnover]) turnover_res = pd.DataFrame([], index=turnover.index, columns=turnover.columns) for i_index in range(len(turnover.columns)): date = turnover.columns[i_index] print('Calculating Barra Risk factor %s at date %s' % (factor_name, date)) regression_data = pd.concat([size_data[date], turnover[date]], axis=1) regression_data.columns = ['x', 'y'] regression_data = regression_data.dropna() y = regression_data['y'].values x = regression_data['x'].values x_add = sm.add_constant(x) model = sm.OLS(y, x_add).fit() regression_data['res'] = regression_data['y'] - model.fittedvalues turnover_res[date] = regression_data['res'] turnover_res = FactorPreProcess().remove_extreme_value_mad(turnover_res) turnover_res = FactorPreProcess().standardization_free_mv(turnover_res) Stock().write_factor_h5(turnover_res, factor_name, 'barra_risk_dfc')
def __init__(self): StockFactorReadWrite.__init__(self) FactorPreProcess.__init__(self)