def get_implicit_factor_return(date): latest_trading_date = str( rqdatac.get_previous_trading_date( datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1))) previous_trading_date = str( rqdatac.get_previous_trading_date(latest_trading_date)) # 取前一交易日全市场已经上市的股票,保证日收益率计算 stock_list = rqdatac.all_instruments( type='CS', date=previous_trading_date)['order_book_id'].tolist() # 剔除上市不满21天的股票 trading_date_21_days_before = str( rqdatac.get_previous_trading_date(latest_trading_date, country='cn', n=21)) stock_list = [ i for i in stock_list if rqdatac.instruments(i).listed_date <= trading_date_21_days_before ] # 剔除ST股 is_st_df = rqdatac.is_st_stock(stock_list, start_date=previous_trading_date, end_date=previous_trading_date) is_st_df.index = is_st_df.index.astype(str) stock_list = is_st_df.loc[previous_trading_date][ is_st_df.loc[previous_trading_date].values == False].index.tolist() # 剔除停牌股 trading_volume = rqdatac.get_price(stock_list, start_date=previous_trading_date, end_date=previous_trading_date, frequency='1d', fields='volume', country='cn') stock_list = trading_volume.loc[previous_trading_date][ trading_volume.loc[previous_trading_date].values > 0].index.tolist() # 计算全市场前一交易日的行业暴露度 factor_exposure = get_exposure(stock_list, str(previous_trading_date)) # 根据上述四类暴露度计算因子收益率 factor_returns = factor_return_estimation(latest_trading_date, factor_exposure) return factor_returns
def data_process(order_book_ids, equity_type, start_date): windows = 132 end_date = rqdatac.get_previous_trading_date(start_date) for i in range(windows + 1): start_date = rqdatac.get_previous_trading_date(start_date) if equity_type is 'funds': period_data = rqdatac.fund.get_nav(order_book_ids, start_date, end_date, fields='acc_net_value') elif equity_type is 'stocks': period_data = rqdatac.get_price(order_book_ids, start_date, end_date, frequency='1d', fields=['close', 'volume']) period_prices = period_data['close'] period_volume = period_data['volume'] # Set up the threshhold of elimination out_threshold = ceil(period_prices.shape[0] / 2) kickout_list = list() suspended_list = list() # Locate the first valid value of each column, if available sequence length is less than threshhold, add # the column name into out_list; if sequence length is longer than threshold but less than chosen period length, # reset the start_date to the later date. The latest start_date whose sequence length is greater than threshold # will be chose. # Check whether any stocks has long suspended trading periods or has been delisted and generate list # for such stocks for i in order_book_ids: if not period_volume.loc[:, i].value_counts().empty: if ((end_date - period_prices.loc[:, i].first_valid_index()) / np.timedelta64(1, 'D')) \ < out_threshold: kickout_list.append(i) elif period_prices.loc[:, i].first_valid_index() < start_date: reset_start_date = period_prices.loc[:, i].first_valid_index() elif period_volume.loc[:, i].last_valid_index() < end_date or \ period_volume.loc[:, i].value_counts().iloc[0] >= out_threshold: suspended_list.append(i) else: kickout_list.append(i) # Check whether any ST stocks are included and generate a list for ST stocks st_list = list(period_prices.columns.values[rqdatac.is_st_stock( order_book_ids, reset_start_date, end_date).sum(axis=0) > 0]) # Generate final kickout list which includes all the above final_kickout_list = list(set().union(kickout_list, st_list, suspended_list)) # Generate clean data order_book_ids_s = set(order_book_ids) final_kickout_list_s = set(final_kickout_list) clean_order_book_ids = list(order_book_ids_s - final_kickout_list_s) clean_period_prices = period_prices.loc[reset_start_date:end_date, clean_order_book_ids] return clean_period_prices, final_kickout_list
def get_implicit_factor_return(date): latest_trading_date = str( rqdatac.get_previous_trading_date( datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1))) previous_trading_date = str( rqdatac.get_previous_trading_date(latest_trading_date)) # 取前一交易日全市场已经上市的股票,保证日收益率计算 stock_list = rqdatac.all_instruments( type='CS', date=previous_trading_date)['order_book_id'].values.tolist() # 计算全市场前一交易日的行业暴露度 rq_exposure, rq_style_barra_industry, barra_exposure, barra_style_rq_industry_exposure = get_exposure( stock_list, previous_trading_date) # 根据上述四类暴露度计算因子收益率 shenwan_industry_name = ['农林牧渔', '采掘', '化工', '钢铁', '有色金属', '电子', '家用电器', '食品饮料', '纺织服装', '轻工制造',\ '医药生物', '公用事业', '交通运输', '房地产', '商业贸易', '休闲服务','综合', '建筑材料', '建筑装饰', '电气设备',\ '国防军工', '计算机', '传媒', '通信', '银行', '非银金融', '汽车', '机械设备'] industry_factors = [ 'CNE5S_ENERGY', 'CNE5S_CHEM', 'CNE5S_CONMAT', 'CNE5S_MTLMIN', 'CNE5S_MATERIAL', 'CNE5S_AERODEF', 'CNE5S_BLDPROD', 'CNE5S_CNSTENG', 'CNE5S_ELECEQP', 'CNE5S_INDCONG', 'CNE5S_MACH', 'CNE5S_TRDDIST', 'CNE5S_COMSERV', 'CNE5S_AIRLINE', 'CNE5S_MARINE', 'CNE5S_RDRLTRAN', 'CNE5S_AUTO', 'CNE5S_HOUSEDUR', 'CNE5S_LEISLUX', 'CNE5S_CONSSERV', 'CNE5S_MEDIA', 'CNE5S_RETAIL', 'CNE5S_PERSPRD', 'CNE5S_BEV', 'CNE5S_FOODPROD', 'CNE5S_HEALTH', 'CNE5S_BANKS', 'CNE5S_DVFININS', 'CNE5S_REALEST', 'CNE5S_SOFTWARE', 'CNE5S_HDWRSEMI', 'CNE5S_UTILITIE' ] rq_factor_returns = factor_return_estimation(date, rq_exposure, shenwan_industry_name) rq_style_barra_industry_factor_returns = factor_return_estimation( date, rq_style_barra_industry, industry_factors) barra_factor_returns = factor_return_estimation(date, barra_exposure, industry_factors) barra_style_rq_industry_factor_returns = factor_return_estimation( date, barra_style_rq_industry_exposure, shenwan_industry_name) return rq_factor_returns, rq_style_barra_industry_factor_returns, barra_factor_returns, barra_style_rq_industry_factor_returns
def pure_factor_return(date): latest_trading_date = str( rqdatac.get_previous_trading_date( datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1))) previous_trading_date = str( rqdatac.get_previous_trading_date(latest_trading_date)) # 取前一交易日全市场已经上市的股票,保证日收益率计算 stock_list = rqdatac.all_instruments( type='CS', date=previous_trading_date)['order_book_id'].values.tolist() # 计算全市场前一交易日的行业暴露度 factor_exposure = get_exposure(stock_list, previous_trading_date, industry_classification='GICS') # 计算因子收益 style_factors = [ 'CNE5S_BETA', 'CNE5S_MOMENTUM', 'CNE5S_SIZE', 'CNE5S_EARNYILD', 'CNE5S_RESVOL', 'CNE5S_GROWTH', 'CNE5S_BTOP', 'CNE5S_LEVERAGE', 'CNE5S_LIQUIDTY', 'CNE5S_SIZENL' ] industry_factors = ['CNE5S_ENERGY', 'CNE5S_CHEM', 'CNE5S_CONMAT', 'CNE5S_MTLMIN', 'CNE5S_MATERIAL', 'CNE5S_AERODEF',\ 'CNE5S_BLDPROD', 'CNE5S_CNSTENG', 'CNE5S_ELECEQP', 'CNE5S_INDCONG', 'CNE5S_MACH', 'CNE5S_TRDDIST',\ 'CNE5S_COMSERV', 'CNE5S_AIRLINE', 'CNE5S_MARINE', 'CNE5S_RDRLTRAN', 'CNE5S_AUTO', 'CNE5S_HOUSEDUR',\ 'CNE5S_LEISLUX', 'CNE5S_CONSSERV', 'CNE5S_MEDIA', 'CNE5S_RETAIL', 'CNE5S_PERSPRD', 'CNE5S_BEV',\ 'CNE5S_FOODPROD', 'CNE5S_HEALTH', 'CNE5S_BANKS', 'CNE5S_DVFININS', 'CNE5S_REALEST', 'CNE5S_SOFTWARE',\ 'CNE5S_HDWRSEMI', 'CNE5S_UTILITIE'] # 重新排序,保证先风格因子,后行业因子 reorganized_factor_exposure = pd.concat([ factor_exposure[style_factors], factor_exposure[industry_factors], factor_exposure['CNE5S_COUNTRY'] ], axis=1) factor_returns = factor_return_estimation( stock_list, date=date, factor_exposure=reorganized_factor_exposure) return factor_returns
def prep_download(self): # self.start = datetime(2011, 1, 1) self.end = datetime.now().date() # read csv commodities futures list with open('cf_list.csv', 'r') as f: reader = csv.reader(f) cf_list = list(reader) rq.init() today = rq.get_latest_trading_date() prev_trading_day = rq.get_previous_trading_date(today, 1, market='cn') for instrument in cf_list: # check if changed the dominant contract for the latest trading day today_dominant = rq.futures.get_dominant(instrument[1], today) prev_day_dominant = rq.futures.get_dominant( instrument[1], prev_trading_day) if today_dominant[today] == prev_day_dominant[prev_trading_day]: self.start = prev_trading_day elif today_dominant[today] != prev_day_dominant[prev_trading_day]: self.start = datetime(2011, 1, 1) print( f"品种 {instrument[1]} 的主力合约即日起从 {prev_day_dominant[prev_trading_day]} 换到 {today_dominant[today]}" ) else: print("####### 检查主力合约过程中出错,请手动debug!") if self.download_full == True: self.start = datetime(2011, 1, 1) self.rq_download(instrument[0], '1m', self.start, self.end) self.rq_download(instrument[0], 'd', self.start, self.end)
def get_factor_covariance(date, parameters): latest_trading_date = rqdatac.get_previous_trading_date( (datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1))) multiperiod_factor_returns = get_multiperiod_factor_returns( latest_trading_date, parameters) Newey_West_adjustment(multiperiod_factor_returns, parameters) # 计算经验协方差矩阵,同时进行年化处理(乘以 252) empirical_factor_covariance = current_factor_return.cov().stack() * 252 empirical_factor_covariance.index.names = ['factor', '_factor'] reformatted_empirical_factor_covariance = empirical_factor_covariance.reset_index( ) Newey_West_adjustment_cov, factor_volitality, correlation_matrix, estimated_cov = Newey_West_adjustment( current_factor_return, multiperiod_factor_returns, all_factors, parameters) eigenfactor_risk_adjustment_cov = eigenfactor_risk_adjustment( Newey_West_adjustment_cov, factor_volitality, all_factors) volatility_regime_adjustment_cov = volatility_regime_adjustment( eigenfactor_risk_adjustment_cov, current_factor_return, parameters) return volatility_regime_adjustment_cov
def get_explicit_factor_returns(date): """ :param date:日期 :return: pandas.Series """ previous_trading_date = rqdatac.get_previous_trading_date(date) all_a_stocks = rqdatac.all_instruments(type="CS",date=previous_trading_date).order_book_id.tolist() filtered_stocks = noisy_stocks_filter(all_a_stocks,previous_trading_date) # print(all_a_stocks,previous_trading_date) factor_exposures = rqdatac.get_style_factor_exposure(all_a_stocks, previous_trading_date, previous_trading_date, "all").sort_index() factor_exposures.index=factor_exposures.index.droplevel(1) closePrice = rqdatac.get_price(all_a_stocks, rqdatac.get_previous_trading_date(previous_trading_date), previous_trading_date, fields="close") priceChange = closePrice.pct_change().iloc[-1] index_mapping = {"csi_300":'000300.XSHG',"csi_500":"000905.XSHG","csi_800":"000906.XSHG"} all_stocks = {index:rqdatac.index_components(index_mapping.get(index),date=previous_trading_date) for index in index_mapping} all_stocks['whole_market'] = filtered_stocks def _calc_explicitReturns_with_stocksList(stocksList): # 根据股票池计算收益率 _sizeBeta = factor_exposures[['size','beta']].loc[stocksList] _quantileGroup = _sizeBeta.apply(lambda x:pd.cut(x,bins=3,labels=False)+1).reset_index() _quantileStocks = _quantileGroup.groupby(['size','beta']).apply(lambda x:x.index.tolist()) market_neutralize_stocks = _quantileStocks.apply( lambda x: pd.Series(stocksList).loc[x].values.tolist()).values.tolist() return factor_exposures.loc[stocksList].apply(lambda x,y=market_neutralize_stocks:_calc_single_explicit_returns(x,y)) def _calc_single_explicit_returns(_factor_exposure,market_neutralize_stocks): # 计算单一因子收益率 def _deuce(series): median = series.median() return [series[series<=median].index.tolist(),series[series>median].index.tolist()] deuceResults = np.array([_deuce(_factor_exposure[neutralized_stks]) for neutralized_stks in market_neutralize_stocks]).flatten() short_stocksList = list(reduce(lambda x,y:set(x)|set(y),np.array([s for i,s in enumerate(deuceResults) if i%2==0]))) long_stockList = list(reduce(lambda x,y:set(x)|set(y),np.array([s for i,s in enumerate(deuceResults) if i%2==1]))) return priceChange[long_stockList].mean() - priceChange[short_stocksList].mean() results = {key: _calc_explicitReturns_with_stocksList(all_stocks.get(key)) for key in all_stocks} return pd.DataFrame(results)[['whole_market','csi_300','csi_500','csi_800']]
def get_previous_trading_days_customized(n): today = dt.datetime.now().date() if len(rqdatac.get_trading_dates(today, today)) == 1: n -= 1 yield today while n > 0: today = rqdatac.get_previous_trading_date(today) yield today n -= 1
def get_daily_excess_return(stock_list, start_date, end_date): # 提取股票价格数据,对于退市情况,考虑作股价向前填补(日收益率为0) stock_daily_return = rqdatac.get_price( stock_list, rqdatac.get_previous_trading_date(start_date), end_date, frequency='1d', fields='close').fillna(method='ffill').pct_change()[1:] # 剔除收益率数据存在空值的股票 inds = stock_daily_return.isnull().sum()[ stock_daily_return.isnull().sum() > 0].index filtered_stock_daily_return = stock_daily_return.drop(inds, axis=1) # 经测试发现,中证全指(000985)作为 market portfolio 的效果最好 market_portfolio_daily_return = rqdatac.get_price( '000985.XSHG', rqdatac.get_previous_trading_date(start_date), end_date, frequency='1d', fields='close').pct_change()[1:] # 计算无风险日收益率 compounded_risk_free_return = rqdatac.get_yield_curve( start_date=start_date, end_date=end_date, tenor='3M') risk_free_return = (((1 + compounded_risk_free_return)**(1 / 365)) - 1).loc[filtered_stock_daily_return.index] daily_excess_return = filtered_stock_daily_return.T.subtract( risk_free_return.iloc[:, 0]).T market_portfolio_daily_excess_return = market_portfolio_daily_return.subtract( risk_free_return.iloc[:, 0]) return daily_excess_return, market_portfolio_daily_excess_return
def volatility_regime_adjustment(factor_covariance, factor_return, date, parameters): # 以权重半衰期的四倍作为样本数量 start_date = pd.Timestamp(date) - relativedelta( months=np.ceil(4 * parameters['VRA_half_life'] / 252 * (12 * 3 / 2))) end_date = rqdatac.get_previous_trading_date(date) trading_dates = rqdatac.get_trading_dates( start_date, end_date)[-4 * parameters['VRA_half_life']:] forecast_factor_volatility = pd.DataFrame() # for date in trading_dates: # # previous_factor_covariance = rqdatac.barra.get_factor_covariance(date) # forecast_factor_volatility[date] = pd.Series(data=np.diag(previous_factor_covariance), index=factor_covariance.index).pow(0.5) for date in trading_dates: if np.isnan(parameters['NeweyWest_volatility_lags']): previous_factor_covariance = pickle.load( open('/Users/rice/Desktop/covariance/daliy_eigen.pkl', 'rb'))[date] elif parameters == shortTermParameters: previous_factor_covariance = pickle.load( open('/Users/rice/Desktop/covariance/short_eigen.pkl', 'rb'))[date] else: previous_factor_covariance = pickle.load( open('/Users/rice/Desktop/covariance/long_eigen.pkl', 'rb'))[date] forecast_factor_volatility[date] = pd.Series( data=np.diag(previous_factor_covariance), index=factor_covariance.index).pow(0.5) # 反年化处理,计算日波动率 daily_factor_volatility = forecast_factor_volatility / np.sqrt(252) exp_weight = get_exponential_weight(parameters['VRA_half_life'], 4 * parameters['VRA_half_life']) cross_sectional_bias = ( factor_return.loc[trading_dates[0]:trading_dates[-1]] / daily_factor_volatility.T).pow(2).mean(axis=1).pow(0.5) factor_volatility_multiplier = cross_sectional_bias.dot(exp_weight) VRA_adjusted_covariance = factor_covariance * (factor_volatility_multiplier **2) return VRA_adjusted_covariance
def get_implicit_factor_return(date): latest_trading_date = str(rqdatac.get_previous_trading_date(datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1))) previous_trading_date = str(rqdatac.get_previous_trading_date(latest_trading_date)) # 取前一交易日全市场已经上市的股票,保证日收益率计算 stock_list = rqdatac.all_instruments(type='CS', date=previous_trading_date)['order_book_id'].values.tolist() trading_volume = rqdatac.get_price(stock_list, start_date=date, end_date=date, frequency='1d', fields='volume',country='cn') stock_list = trading_volume.loc[date][trading_volume.loc[date].values > 0].index.tolist() # 计算全市场前一交易日的行业暴露度 factor_exposure = get_exposure(stock_list,str(previous_trading_date)) # 根据上述四类暴露度计算因子收益率 factor_returns = factor_return_estimation(date, factor_exposure) return factor_returns
def get_factor_covariance(date, parameters): industry_factors = [ 'CNE5S_ENERGY', 'CNE5S_CHEM', 'CNE5S_CONMAT', 'CNE5S_MTLMIN', 'CNE5S_MATERIAL', 'CNE5S_AERODEF', 'CNE5S_BLDPROD', 'CNE5S_CNSTENG', 'CNE5S_ELECEQP', 'CNE5S_INDCONG', 'CNE5S_MACH', 'CNE5S_TRDDIST', 'CNE5S_COMSERV', 'CNE5S_AIRLINE', 'CNE5S_MARINE', 'CNE5S_RDRLTRAN', 'CNE5S_AUTO', 'CNE5S_HOUSEDUR', 'CNE5S_LEISLUX', 'CNE5S_CONSSERV', 'CNE5S_MEDIA', 'CNE5S_RETAIL', 'CNE5S_PERSPRD', 'CNE5S_BEV', 'CNE5S_FOODPROD', 'CNE5S_HEALTH', 'CNE5S_BANKS', 'CNE5S_DVFININS', 'CNE5S_REALEST', 'CNE5S_SOFTWARE', 'CNE5S_HDWRSEMI', 'CNE5S_UTILITIE' ] style_factors = [ 'CNE5S_BETA', 'CNE5S_MOMENTUM', 'CNE5S_SIZE', 'CNE5S_EARNYILD', 'CNE5S_RESVOL', 'CNE5S_GROWTH', 'CNE5S_BTOP', 'CNE5S_LEVERAGE', 'CNE5S_LIQUIDTY', 'CNE5S_SIZENL' ] country_factor = ['CNE5S_COUNTRY'] all_factors = industry_factors + style_factors + country_factor latest_trading_date = rqdatac.get_previous_trading_date( (datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1))) current_factor_return, multiperiod_factor_returns = get_multiperiod_factor_returns( all_factors, latest_trading_date, parameters) # 计算经验协方差矩阵,同时进行年化处理(乘以 252) empirical_factor_covariance = current_factor_return.cov().stack() * 252 empirical_factor_covariance.index.names = ['factor', '_factor'] reformatted_empirical_factor_covariance = empirical_factor_covariance.reset_index( ) Newey_West_adjustment_cov, factor_volitality, correlation_matrix, estimated_cov = Newey_West_adjustment( current_factor_return, multiperiod_factor_returns, all_factors, parameters) eigenfactor_risk_adjustment_cov = eigenfactor_risk_adjustment( Newey_West_adjustment_cov, factor_volitality, all_factors) volatility_regime_adjustment_cov = volatility_regime_adjustment( eigenfactor_risk_adjustment_cov, current_factor_return, parameters) return volatility_regime_adjustment_cov
def get_factor_covariance(date, parameters): latest_trading_date = rqdatac.get_previous_trading_date( (datetime.strptime(str(date), "%Y-%m-%d") + timedelta(days=1))) multiperiod_factor_returns = get_multiperiod_factor_returns( latest_trading_date, longTermParameters) NeweyWest_adjusted_volatility = volatility_NeweyWest_adjustment( multiperiod_factor_returns, parameters) NeweyWest_adjusted_correlation = correlation_NeweyWest_adjustment( multiperiod_factor_returns, parameters) volatility_outerproduct = pd.DataFrame( np.outer(NeweyWest_adjusted_volatility, NeweyWest_adjusted_volatility), index=NeweyWest_adjusted_volatility.index, columns=NeweyWest_adjusted_volatility.index) NeweyWest_adjusted_covariance = NeweyWest_adjusted_correlation * volatility_outerproduct eigenfactor_adjusted_covariance = eigenfactor_risk_adjustment( NeweyWest_adjusted_covariance) #VRA_adjusted_covariance = volatility_regime_adjustment(eigenfactor_adjusted_covariance, multiperiod_factor_returns['current'], date, parameters) return NeweyWest_adjusted_covariance, eigenfactor_adjusted_covariance
def get_momentum_and_res_vol(date): latest_trading_date = rqdatac.get_previous_trading_date(datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1)) trading_date_252_before = rqdatac.get_trading_dates(latest_trading_date - timedelta(days=500), latest_trading_date, country='cn')[-252] stock_list = rqdatac.all_instruments(type='CS', date=latest_trading_date)['order_book_id'].values.tolist() ### 获取因子计算共用的行情数据和财务数据 recent_report_type, annual_report_type, market_cap_on_current_day, \ stock_excess_return, market_portfolio_excess_return, recent_five_annual_shares, \ last_reported_non_current_liabilities, last_reported_preferred_stock = get_financial_and_market_data(stock_list,latest_trading_date,trading_date_252_before) # momentum和residual volatility计算 market_portfolio_beta, market_portfolio_beta_exposure = get_market_portfolio_beta(stock_excess_return,market_portfolio_excess_return,market_cap_on_current_day) momentum = get_momentum(stock_list, latest_trading_date, market_cap_on_current_day) daily_standard_deviation, cumulative_range, historical_sigma, residual_volatility = get_residual_volatility( stock_list, latest_trading_date, stock_excess_return, market_portfolio_excess_return, market_cap_on_current_day, market_portfolio_beta_exposure, market_portfolio_beta) style_factors_exposure = pd.concat([momentum, residual_volatility], axis=1) style_factors_exposure.columns = ['momentum', 'residual_volatility'] atomic_descriptors_exposure = pd.concat( [daily_standard_deviation, cumulative_range, historical_sigma], axis=1) atomic_descriptors_exposure.columns = ['daily_standard_deviation', 'cumulative_range', 'historical_sigma'] # 提取财务数据的时候,会提取当前全市场股票的数据,因此 dataframe 中可能包含计算日期未上市的股票,需要对 style_factors_exposure 取子集 atomic_descriptors_exposure = atomic_descriptors_exposure.loc[stock_list] style_factors_exposure = style_factors_exposure.loc[stock_list] # 用回归方法处理细分因子的缺失值 imputed_atomic_descriptors = pd.DataFrame() for atomic_descriptor in atomic_descriptors_exposure.columns: imputed_atomic_descriptors[atomic_descriptor] = individual_factor_imputation(stock_list, atomic_descriptors_exposure[atomic_descriptor], market_cap_on_current_day,latest_trading_date.strftime('%Y-%m-%d')) # 用回归方法处理风格因子暴露度的缺失值 imputed_style_factors_exposure = style_factors_imputation(style_factors_exposure, market_cap_on_current_day,latest_trading_date.strftime('%Y-%m-%d')) # 若经过缺失值处理后因子暴露度依旧存在缺失值,使用全市场股票进行回归,填补缺失值 if imputed_style_factors_exposure.isnull().sum().sum() > 0: imputed_style_factors_exposure = factor_imputation(market_cap_on_current_day,imputed_style_factors_exposure) if imputed_atomic_descriptors.isnull().sum().sum() > 0: imputed_atomic_descriptors = factor_imputation(market_cap_on_current_day,imputed_atomic_descriptors) return imputed_atomic_descriptors, imputed_style_factors_exposure
def optimizer(order_book_ids, start_date, asset_type, method, current_weight=None, bnds=None, cons=None, expected_return=None, expected_return_covar=None, risk_aversion_coefficient=1, windows=None, out_threshold_coefficient=None, data_freq=None, fun_tol=10**-8, max_iteration=10**3, disp=False, iprint=1, cov_enhancement=True, benchmark=None): """ :param order_book_ids: str list. A list of assets(stocks or funds). Optional when expected_return_covar is given; :param start_date: str. Date to initialize a portfolio or re-balance a portfolio. Optional when expected_return_covar is given; :param asset_type: str. "stock" or "fund". Types of portfolio candidates, portfolio with mixed assets is not supported; :param method: str. Portfolio optimization model: "risk_parity", "min_variance", "mean_variance", "risk_parity_with_con", "min_TE", "all"("all" method only contains "risk_parity", "min_variance", "risk_parity_with_con"). When "min_TE" method is chosen, expected_return_covar must be None type. :param current_weight: floats list, optional. Default: 1/N(N: no. of assets). Initial guess for optimization. :param bnds: floats list, optional. Lower bounds and upper bounds for each asset in portfolio. Support input format: {"asset_code1": (lb1, up1), "asset_code2": (lb2, up2), ...} or {'full_list': (lb, up)} (set up universal bounds for all assets); :param cons: dict, optional. Lower bounds and upper bounds for each category of assets in portfolio; Supported funds type: Bond, Stock, Hybrid, Money, ShortBond, StockIndex, BondIndex, Related, QDII, Other; supported stocks industry sector: Shenwan_industry_name; cons: {"types1": (lb1, up1), "types2": (lb2, up2), ...}; :param expected_return: pandas DataFrame. Default: Means of the returns for order_book_ids within windows(empirical means). Must input this if expected_return_covar is given to run "mean_variance" method. :param expected_return_covar: pandas DataFrame, optional. Covariance matrix of expected return. Default: covariance of the means of the returns of order_book_ids within windows. If expected_return_covar is given, any models involve covariance matrix will use expected_return_covar instead of estimating from sample data. Moreover, if expected_return_covar is given and "mean_variance" method is chosen, expected_return must also be given; :param risk_aversion_coefficient: float, optional. Risk aversion coefficient of Mean-Variance model. Default: 1. :param windows: int, optional. Default: 132. Data windows length. :param data_freq: str, optional. Default: "D". Support input: "D": daily data; "W": weekly data; "M": monthly data. Weekly data means the close price at the end of each week is taken; monthly means the close price at the end of each month. When weekly and monthly data are used, suspended days issues will not be considered. In addition, weekly and monthly data don't consider public holidays which have no trading. Users should use a windows a little bit larger to get desired data length. :param out_threshold_coefficient: float, optional. Determine the threshold to filter out assets with too short data which may cause problem in covariance matrix calculation. Whose data length is shorter than threshold will be eliminated. Default: 0.5(out_threshold = 0.5*windows). :param fun_tol: float, optional. Optimization accuracy requirement. The smaller, the more accurate, but cost more time. Default: 10E-12. :param max_iteration: int, optional. Max iteration number allows during optimization. Default: 1000. :param disp: bool, optional. Optimization summary display control. Override iprint interface. Default: False. :param cov_enhancement: bool, optional. Default: True. Use shrinkage method based on Ledoit and Wolf(2003) to improve the estimation for sample covariance matrix. It's recommended to set it to True when the stock pool is large. :param benchmark: str, optional. Target to track in minimum tracking error("min_TE") method. :param iprint: int, optional. The verbosity of optimization: * iprint <= 0 : Silent operation; * iprint == 1 : Print summary upon completion (default); * iprint >= 2 : Print status of each iterate and summary. :return: pandas DataFrame. A DataFrame contains assets' name and their corresponding optimal weights; pandas DataFrame. The covariance matrix for optimization; pandas DataFrame. The order_book_ids filtered out and the reasons of elimination; str. Optimization message. Return this only when methods other than "all". """ if not disp: iprint = 0 opts = { 'maxiter': max_iteration, 'ftol': fun_tol, 'iprint': iprint, 'disp': disp } log_barrier_risk_parity_iprint = {0: -1, 1: 0, 2: 1} log_barrier_risk_parity_opts = { 'disp': log_barrier_risk_parity_iprint[disp * iprint] } if data_freq is None: data_freq = "D" if windows is None: windows = 132 if expected_return_covar is None: # Get clean data and calculate covariance matrix if no expected_return_covar is given data_after_processing = data_process(order_book_ids, asset_type, start_date, windows, data_freq, out_threshold_coefficient) clean_period_prices = data_after_processing[0] reset_start_date = data_after_processing[2] # At least two assets are needed if clean_period_prices.shape[1] <= 1: raise OptimizationError("错误:数据剔除后order_book_ids数量不足。") # Generate enhanced estimation for covariance matrix period_daily_return_pct_change = clean_period_prices.pct_change()[1:] if cov_enhancement: c_m = cov_shrinkage(clean_period_prices)[0] else: c_m = period_daily_return_pct_change.cov() # Generate initial guess point with equal weights if current_weight is None: current_weight = [1 / clean_period_prices.shape[1] ] * clean_period_prices.shape[1] else: new_current_weight = current_weight current_weight = list() for i in clean_period_prices.columns.values: current_weight.append( new_current_weight[order_book_ids.index(i)]) # Generate expected_return if not given if method is "mean_variance": empirical_mean = period_daily_return_pct_change.mean() if expected_return is None: expected_return = empirical_mean else: for i in expected_return.index.values: if i in empirical_mean.index.values: empirical_mean.loc[i] = expected_return.loc[i] expected_return = empirical_mean else: # Get preparation done when expected_return_covar is given c_m = expected_return_covar if current_weight is None: current_weight = [1 / c_m.shape[0]] * c_m.shape[0] order_book_ids = list(c_m.columns.values) # Read benchmark data for min tracking error model if method is "min_TE": if benchmark is None: raise OptimizationError("错误:没有选择基准。") benchmark_price = rqdatac.get_price( benchmark, start_date=reset_start_date, end_date=rqdatac.get_previous_trading_date(start_date), fields="close") if data_freq is not "D": benchmark_price_change = benchmark_price.asfreq( data_freq, method="pad").pct_change()[1:] else: benchmark_price_change = benchmark_price.pct_change()[1:] # Generate bounds clean_order_book_ids = list(c_m.columns.values) if method is "all": log_rp_bnds, general_bnds = bounds_gen(order_book_ids, clean_order_book_ids, method, bnds) elif method is "risk_parity": log_rp_bnds = bounds_gen(order_book_ids, clean_order_book_ids, method, bnds) else: general_bnds = bounds_gen(order_book_ids, clean_order_book_ids, method, bnds) # Generate constraints if method is not "risk_parity": ######################################################################### # add for test purpose to set all constraints by zs on 0705 if cons == 1: # get type and determine cons clean_order_book_ids = list(clean_period_prices.columns) df1 = pd.DataFrame(index=clean_order_book_ids, columns=['type']) if asset_type is 'fund': for i in clean_order_book_ids: df1.loc[i, 'type'] = fund.instruments(i).fund_type elif asset_type is 'stock': for i in clean_order_book_ids: df1.loc[i, "type"] = rqdatac.instruments( i).shenwan_industry_name all_types = df1['type'].unique() cons_num = 1 / len(all_types) cons = {} for i in all_types: cons[i] = (cons_num - 0.03, cons_num + 0.03) ######################################################################### general_cons = general_constraints_gen(order_book_ids, clean_order_book_ids, asset_type, cons) # Log barrier risk parity model c = 15 def log_barrier_risk_parity_obj_fun(x): return np.dot(x, np.dot(c_m, x)) - c * sum(np.log(x)) def log_barrier_risk_parity_gradient(x): return np.multiply(2, np.dot(c_m, x)) - np.multiply( c, np.reciprocal(x)) def log_barrier_risk_parity_optimizer(): optimization_res = sc_opt.minimize( log_barrier_risk_parity_obj_fun, current_weight, method='L-BFGS-B', jac=log_barrier_risk_parity_gradient, bounds=log_rp_bnds, options=log_barrier_risk_parity_opts) if not optimization_res.success: if optimization_res.nit >= max_iteration: optimal_weights = (optimization_res.x / sum(optimization_res.x)) optimization_info = "Iteration limit exceeded" return optimal_weights, optimization_info else: temp = ' @ %s' % clean_period_prices.index[0] error_message = '错误:risk_parity 算法优化失败,' + str( optimization_res.message) + temp raise OptimizationError(error_message) else: optimal_weights = (optimization_res.x / sum(optimization_res.x)) optimization_info = "Optimization terminated successfully." return optimal_weights, optimization_info # Risk parity with constraints model def risk_parity_with_con_obj_fun(x): temp1 = np.multiply(x, np.dot(c_m, x)) temp2 = temp1[:, None] return np.sum(scsp.distance.pdist(temp2, "euclidean")) def risk_parity_with_con_optimizer(): optimization_res = sc_opt.minimize(risk_parity_with_con_obj_fun, current_weight, method='SLSQP', bounds=general_bnds, constraints=general_cons, options=opts) optimization_info = optimization_res.message if not optimization_res.success: if optimization_res.nit >= max_iteration: return optimization_res.x, optimization_info else: temp = ' @ %s' % clean_period_prices.index[0] error_message = '错误:带限制条件的risk_parity 算法优化失败,' + str(optimization_res.message) \ + temp raise OptimizationError(error_message) else: return optimization_res.x, optimization_info # Min variance model def min_variance_obj_fun(x): return np.dot(np.dot(x, c_m), x) def min_variance_gradient(x): return np.multiply(2, np.dot(c_m, x)) def min_variance_optimizer(): optimization_res = sc_opt.minimize(min_variance_obj_fun, current_weight, method='SLSQP', jac=min_variance_gradient, bounds=general_bnds, constraints=general_cons, options=opts) optimization_info = optimization_res.message if not optimization_res.success: if optimization_res.nit >= max_iteration: return optimization_res.x, optimization_info else: temp = ' @ %s' % clean_period_prices.index[0] error_message = '错误:min_variance 算法优化失败,' + str( optimization_res.message) + temp raise OptimizationError(error_message) else: return optimization_res.x, optimization_info # Mean variance model def mean_variance_obj_fun(x): return (np.multiply(risk_aversion_coefficient / 2, np.dot(np.dot(x, c_m), x)) - np.dot(x, expected_return)) def mean_variance_gradient(x): return np.asfarray( np.multiply(risk_aversion_coefficient, np.dot(x, c_m)).transpose() - expected_return).flatten() def mean_variance_optimizer(): optimization_res = sc_opt.minimize(mean_variance_obj_fun, current_weight, method='SLSQP', jac=mean_variance_gradient, bounds=general_bnds, constraints=general_cons, options=opts) optimization_info = optimization_res.message if not optimization_res.success: if optimization_res.nit >= max_iteration: return optimization_res.x, optimization_info else: temp = ' @ %s' % clean_period_prices.index[0] error_message = '错误:mean_variance 算法优化失败,' + str( optimization_res.message) + temp raise OptimizationError(error_message) else: return optimization_res.x, optimization_info # Minimizing tracking error model def min_TE_obj_fun(x): return np.dot( np.subtract(benchmark_price_change, np.dot(period_daily_return_pct_change, x)).T, np.subtract(benchmark_price_change, np.dot(period_daily_return_pct_change, x))) def min_TE_optimizer(): optimization_res = sc_opt.minimize(min_TE_obj_fun, current_weight, method='SLSQP', bounds=general_bnds, constraints=general_cons, options=opts) optimization_info = optimization_res.message if not optimization_res.success: if optimization_res.nit >= max_iteration: return optimization_res.x, optimization_info else: temp = ' @ %s' % clean_period_prices.index[0] error_message = '错误:min_TE 算法优化失败,' + str( optimization_res.message) + temp raise OptimizationError(error_message) else: return optimization_res.x, optimization_info opt_dict = { 'risk_parity': log_barrier_risk_parity_optimizer, 'min_variance': min_variance_optimizer, 'mean_variance': mean_variance_optimizer, 'risk_parity_with_con': risk_parity_with_con_optimizer, "min_TE": min_TE_optimizer, 'all': [ log_barrier_risk_parity_optimizer, min_variance_optimizer, risk_parity_with_con_optimizer ] } if method is not 'all': if expected_return_covar is None: return pd.DataFrame(opt_dict[method]()[0], index=list(c_m.columns.values), columns=[method]), c_m, \ data_after_processing[1], opt_dict[method]()[1] else: pd.DataFrame(opt_dict[method]()[0], index=list(c_m.columns.values), columns=[method]), c_m, \ opt_dict[method]()[1] else: temp1 = pd.DataFrame( index=list(c_m.columns.values), columns=['risk_parity', 'min_variance', "risk_parity_with_con"]) temp2 = pd.DataFrame( index=["risk_parity", "min_variance", "risk_parity_with_con"], columns=["Opt Res Message"]) n = 0 for f in opt_dict[method]: temp1.iloc[:, n] = f()[0] temp2.iloc[n, 0] = f()[1] n = n + 1 if expected_return_covar is None: return temp1, c_m, data_after_processing[1], temp2 else: return temp1, c_m, temp2
def data_process(order_book_ids, asset_type, start_date, windows, data_freq, out_threshold_coefficient=None): """ Clean data for covariance matrix calculation :param order_book_ids: str list. A selected list of assets. :param asset_type: str. "fund" or "stock" :param start_date: str. The first day for backtest. :param windows: int. Interval length for sample. :param out_threshold_coefficient: float, optional. Determine the threshold to filter out assets with too short data which may cause problem in covariance matrix calculation. Whose data length is shorter than threshold will be eliminated. Default: 0.5(out_threshold = 0.5*windows). :param data_freq: str. Support input: "D": daily data; "W": weekly data; "M": monthly data. Weekly data means the close price at the end of each week is taken; monthly means the close price at the end of each month. When weekly and monthly data are used, suspended days issues will not be considered. In addition, weekly and monthly data don't consider public holidays which have no trading. Users should use a windows a little bit larger to get desired data length. Users should be very careful when using weekly or monthly data to avoid the observations have too short length. :return: pandas DataFrame. Contain the prices after cleaning; pandas DataFrame. The order_book_ids filtered out and the reasons of elimination; str. A new start date for covariance calculation which may differ from default windows setting. """ end_date = rqdatac.get_previous_trading_date(start_date) end_date = pd.to_datetime(end_date) # Choose the start date based on the windows inputted, can't work if backtest start date is earlier than # "1995-01-01". The windows for weekly and monthly data don't consider any public holidays which have no trading. windows_dict = { "D": -(windows + 1), "W": -(windows + 1) * 5, "M": -(windows + 1) * 22 } start_date = rqdatac.get_trading_dates("2005-01-01", end_date)[windows_dict[data_freq]] reset_start_date = pd.to_datetime(start_date) if asset_type is 'fund': period_prices = rqdatac.fund.get_nav(order_book_ids, reset_start_date, end_date, fields='adjusted_net_value') elif asset_type is 'stock': period_data = rqdatac.get_price(order_book_ids, reset_start_date, end_date, frequency='1d', fields=['close', 'volume']) period_prices = period_data['close'] period_volume = period_data['volume'] if data_freq is not "D": period_prices = period_prices.asfreq(data_freq, method="pad") # Set up the threshold of elimination if out_threshold_coefficient is None: out_threshold = ceil(windows * 0.5) else: out_threshold = ceil(windows * out_threshold_coefficient) kickout_assets = pd.DataFrame(columns=["剔除原因"]) # Check whether any stocks has long suspended trading periods, have been delisted or new-listed for less than 132 # trading days and generate list for such stocks. For weekly and monthly data, only those assets which have too late # beginning date, were delisted or new-listed will be eliminated. if asset_type is "stock": if data_freq is "D": for i in order_book_ids: period_volume_i = period_volume.loc[:, i] period_volume_i_value_counts = period_volume_i.value_counts() period_volume_i_value_counts_index = period_volume_i_value_counts.index.values instrument_i_de_listed_date = rqdatac.instruments( i).de_listed_date instrument_i_listed_date = pd.to_datetime( rqdatac.instruments(i).listed_date) if not period_volume_i_value_counts.empty: # New-listed stock test if (end_date - instrument_i_listed_date).days <= 132: temp = pd.DataFrame({"剔除原因": "上市时间少于132个交易日"}, index=[i]) kickout_assets = kickout_assets.append(temp) # Delisted test elif instrument_i_de_listed_date != "0000-00-00": if pd.to_datetime( instrument_i_de_listed_date) < end_date: temp = pd.DataFrame({"剔除原因": "已退市"}, index=[i]) kickout_assets = kickout_assets.append(temp) # Long suspended test elif 0 in period_volume_i_value_counts_index: if period_volume_i_value_counts[ period_volume_i_value_counts_index == 0][0] >= out_threshold: temp = pd.DataFrame({"剔除原因": "停牌交易日数量过多"}, index=[i]) kickout_assets = kickout_assets.append(temp) # Late beginning day test and just-in-case test for missing values elif period_volume_i.isnull().sum() >= out_threshold: temp = pd.DataFrame({"剔除原因": "缺失值过多"}, index=[i]) kickout_assets = kickout_assets.append(temp) else: temp = pd.DataFrame({"剔除原因": "无相关股票数据"}, index=[i]) kickout_assets = kickout_assets.append(temp) else: for i in order_book_ids: period_prices_i = period_prices.loc[:, i] instrument_i_de_listed_date = rqdatac.instruments( i).de_listed_date instrument_i_listed_date = pd.to_datetime( rqdatac.instruments(i).listed_date) if not ((period_prices_i.isnull() == 0).sum() == 0): # New-listed test if (end_date - instrument_i_listed_date).days <= 132: temp = pd.DataFrame({"剔除原因": "股票上市时间少于132个交易日"}, index=[i]) kickout_assets = kickout_assets.append(temp) # Delisted test elif instrument_i_de_listed_date != "0000-00-00": if pd.to_datetime( instrument_i_de_listed_date) < end_date: temp = pd.DataFrame({"剔除原因": "股票已退市"}, index=[i]) kickout_assets = kickout_assets.append(temp) # Late beginning day test and just-in-case test for missing values elif period_prices_i.isnull().sum() >= out_threshold: temp = pd.DataFrame({"剔除原因": "缺失值过多"}, index=[i]) kickout_assets = kickout_assets.append(temp) else: temp = pd.DataFrame({"剔除原因": "无相关股票数据"}, index=[i]) kickout_assets = kickout_assets.append(temp) # # Check whether any ST stocks are included and generate a list for ST stocks # st_list = list(period_prices.columns.values[rqdatac.is_st_stock(order_book_ids, # reset_start_date, end_date).sum(axis=0) > 0]) # kickout_assets = kickout_assets.append(pd.DataFrame(["ST stocks"] * len(st_list), # columns=["剔除原因"], index=[st_list])) elif asset_type is "fund": for i in order_book_ids: period_prices_i = period_prices.loc[:, i] instrument_i_de_listed_date = rqdatac.fund.instruments( i).de_listed_date instrument_i_listed_date = pd.to_datetime( rqdatac.fund.instruments(i).listed_date) if not ((period_prices_i.isnull() == 0).sum() == 0): # New-listed test if (end_date - instrument_i_listed_date).days <= 132: temp = pd.DataFrame({"剔除原因": "基金发行时间少于132个交易日"}, index=[i]) kickout_assets = kickout_assets.append(temp) # Delisted test elif instrument_i_de_listed_date != "0000-00-00": if pd.to_datetime(instrument_i_de_listed_date) < end_date: temp = pd.DataFrame({"剔除原因": "基金已清算"}, index=[i]) kickout_assets = kickout_assets.append(temp) elif period_prices_i.isnull().sum() >= out_threshold: temp = pd.DataFrame({"剔除原因": "缺失值过多"}, index=[i]) kickout_assets = kickout_assets.append(temp) else: temp = pd.DataFrame({"剔除原因": "无相关基金数据"}, index=[i]) kickout_assets = kickout_assets.append(temp) period_prices = period_prices.fillna(method="pad") # Generate final kickout list which includes all the above final_kickout_list = list(set(kickout_assets.index)) # Generate clean data and keep the original input id order clean_order_book_ids = list(set(order_book_ids) - set(final_kickout_list)) clean_period_prices = period_prices.loc[reset_start_date:end_date, clean_order_book_ids] return clean_period_prices, kickout_assets, reset_start_date
def black_litterman_prep(order_book_ids, start_date, investors_views, investors_views_indicate_M, investors_views_uncertainty=None, asset_type=None, market_weight=None, risk_free_rate_tenor=None, risk_aversion_coefficient=None, excess_return_cov_uncertainty=None, confidence_of_views=None, windows=None, data_freq=None): """ Generate expected return and expected return covariance matrix with Black-Litterman model. Suppose we have N assets and K views. The method can only support daily data so far. It's highly recommended to use your own ways to create investors_views_uncertainty, risk_aversion_coefficient and excess_return_cov_uncertainty beforehand to get the desired distribution parameters. :param order_book_ids: str list. A group of assets; :param asset_type: str. "fund" or "stock"; :param start_date: str. The first day of backtest period; :param windows: int. Interval length of sample; Default: 132; :param investors_views: K*1 numpy matrix. Each row represents one view; :param investors_views_indicate_M: K*N numpy matrix. Each row corresponds to one view. Indicate which view is involved during calculation; :param investors_views_uncertainty: K*K diagonal matrix, optional. If it is skipped, He and Litterman's method will be called to generate diagonal matrix if confidence_of_view is also skipped; Idzorek's method will be called if confidence_of_view is passed in; Has to be non-singular; :param market_weight: floats list, optional. Weights for market portfolio; Default: Equal weights portfolio; :param risk_free_rate_tenor: str, optional. Risk free rate term. Default: "0S"; Support input: "0S", "1M", "3M", "6M", "1Y"; :param risk_aversion_coefficient: float, optional. If no risk_aversion_coefficient is passed in, then risk_aversion_coefficient = market portfolio risk premium / market portfolio volatility; :param excess_return_cov_uncertainty: float, optional. Default: 1/T where T is the time length of sample; :param confidence_of_views: floats list, optional. Represent investors' confidence levels on each view. :param data_freq: str. Support input: "D": daily data; "W": weekly data; "M": monthly data. Weekly data means the close price at the end of each week is taken; monthly means the close price at the end of each month. When weekly and monthly data are used, suspended days issues will not be considered. In addition, weekly and monthly data don't consider public holidays which have no trading. Users should use a windows a little bit larger to get desired data length. Users should be very careful when using weekly or monthly data to avoid the observations have too short length. :return: numpy matrix. Expected return vector; numpy matrix. Covariance matrix of expected return; float. risk_aversion_coefficient; numpy ndarray. investors_views_uncertainty. """ risk_free_rate_dict = {'0S': 1, '1M': 30, '3M': 92, '6M': 183, '1Y': 365} if market_weight is None: market_weight = pd.DataFrame([1 / len(order_book_ids)] * len(order_book_ids), index=order_book_ids) if windows is None: windows = 132 if data_freq is None: data_freq = "D" if asset_type is None: asset_type = "fund" if risk_free_rate_tenor is None: risk_free_rate_tenor = "0S" # Clean data end_date = rqdatac.get_previous_trading_date(start_date) end_date = pd.to_datetime(end_date) clean_period_prices, reset_start_date = (data_process( order_book_ids, asset_type, start_date, windows, data_freq)[i] for i in [0, 2]) if excess_return_cov_uncertainty is None: excess_return_cov_uncertainty = 1 / clean_period_prices.shape[0] # Fetch risk free rate data reset_start_date = rqdatac.get_next_trading_date(reset_start_date) risk_free_rate = rqdatac.get_yield_curve(reset_start_date, end_date, tenor=risk_free_rate_tenor, country='cn') if data_freq is not "D": risk_free_rate = risk_free_rate.asfreq(data_freq, method="pad") risk_free_rate[data_freq] = pd.Series( np.power(1 + risk_free_rate.iloc[:, 0], risk_free_rate_dict[risk_free_rate_tenor] / 365) - 1, index=risk_free_rate.index) # Calculate risk premium for each equity clean_period_prices_pct_change = clean_period_prices.pct_change() clean_period_excess_return = clean_period_prices_pct_change.subtract( risk_free_rate[data_freq], axis=0) # Wash out the ones in kick_out_list clean_market_weight = market_weight.loc[clean_period_prices.columns.values] temp_sum_weight = clean_market_weight.sum() clean_market_weight = clean_market_weight.div(temp_sum_weight) # If no risk_aversion_coefficient is passed in, then # risk_aversion_coefficient = market portfolio risk premium / market portfolio volatility if risk_aversion_coefficient is None: market_portfolio_return = np.dot(clean_period_prices_pct_change, clean_market_weight) risk_aversion_coefficient = ((market_portfolio_return[1:].mean() - risk_free_rate[data_freq].mean()) / market_portfolio_return[1:].var()) clean_period_excess_return_cov = clean_period_excess_return[1:].cov() equilibrium_return = np.multiply( np.dot(clean_period_excess_return_cov, clean_market_weight), risk_aversion_coefficient) # Generate the investors_views_uncertainty matrix if none is passed in if investors_views_uncertainty is None: if confidence_of_views is None: # He and Litteman's(1999) method to generate the uncertainty diagonal matrix, confidence level on each view # doesn't need. Omeg_diag = list() for i in range(investors_views_indicate_M.shape[0]): temp = np.dot( np.dot(investors_views_indicate_M[i, :], clean_period_excess_return_cov), investors_views_indicate_M[ i, :].transpose()) * excess_return_cov_uncertainty Omeg_diag.append(temp.item(0)) investors_views_uncertainty = np.diag(Omeg_diag) else: # Idzorek's(2002) method, users can specify their confidence level on each view. Omeg_diag = list() for i in range(len(investors_views)): part1 = excess_return_cov_uncertainty * np.dot( clean_period_excess_return_cov, investors_views_indicate_M[i, :].transpose()) part2 = 1 / (excess_return_cov_uncertainty * np.dot( investors_views_indicate_M[i, :], np.dot(clean_period_excess_return_cov, investors_views_indicate_M[i, :].T))) part3 = investors_views[i] - np.dot( investors_views_indicate_M[i, :], equilibrium_return) return_with_full_confidence = equilibrium_return + np.multiply( part2 * part3, part1) weights_with_full_confidence = np.dot( np.linalg.inv( np.multiply(risk_aversion_coefficient, clean_period_excess_return_cov)), return_with_full_confidence) temp1 = weights_with_full_confidence - clean_market_weight temp2 = np.multiply( confidence_of_views[i], np.absolute(investors_views_indicate_M[i, :].transpose())) tilt = np.multiply(temp1, temp2) weights_with_partial_confidence = clean_market_weight.as_matrix( ) + tilt def objective_fun(x): temp1 = np.linalg.inv( np.multiply(risk_aversion_coefficient, clean_period_excess_return_cov)) temp2 = np.linalg.inv( np.linalg.inv( np.multiply(excess_return_cov_uncertainty, clean_period_excess_return_cov)) + np.multiply( np.reciprocal(x), np.dot( investors_views_indicate_M[i, :].transpose(), investors_views_indicate_M[i, :]))) temp3 = (np.dot( np.linalg.inv( np.multiply(excess_return_cov_uncertainty, clean_period_excess_return_cov)), equilibrium_return) + np.multiply( investors_views[i] * np.reciprocal(x), investors_views_indicate_M[i, :].transpose())) wk = np.dot(temp1, np.dot(temp2, temp3)) return np.linalg.norm( np.subtract(weights_with_partial_confidence, wk)) # Upper bound should be consistent with the magnitude of return upper_bound = abs(equilibrium_return.mean()) * 100 omega_k = sc_opt.minimize_scalar(objective_fun, bounds=(10**-8, upper_bound), method="bounded", options={"xatol": 10**-8}) Omeg_diag.append(omega_k.x.item(0)) investors_views_uncertainty = np.diag(Omeg_diag) # Combine all the information above to get the distribution of expected return with given views combined_return_covar = np.linalg.inv( np.linalg.inv( np.multiply(excess_return_cov_uncertainty, clean_period_excess_return_cov)) + np.dot( np.dot(investors_views_indicate_M.transpose(), np.linalg.inv(investors_views_uncertainty)), investors_views_indicate_M)) temp1 = np.dot( np.linalg.inv( np.multiply(excess_return_cov_uncertainty, clean_period_excess_return_cov)), equilibrium_return) temp2 = np.dot( np.dot(investors_views_indicate_M.transpose(), np.linalg.inv(investors_views_uncertainty)), investors_views) temp = temp1 + temp2 combined_return_mean = np.dot(combined_return_covar, temp) return combined_return_mean, combined_return_covar, risk_aversion_coefficient, investors_views_uncertainty
def black_litterman_prep(order_book_ids, start_date, investors_views, investors_views_indicate_M, investors_views_uncertainty=None, asset_type=None, market_weight=None, risk_free_rate_tenor=None, risk_aversion_coefficient=None, excess_return_cov_uncertainty=None, confidence_of_views=None): risk_free_rate_dict = ['0S', '1M', '2M', '3M', '6M', '9M', '1Y', '2Y', '3Y', '4Y', '5Y', '6Y', '7Y', '8Y', '9Y', '10Y', '15Y', '20Y', '30Y', '40Y', '50Y'] windows = 132 if market_weight is None: market_weight = pd.DataFrame([1/len(order_book_ids)] * len(order_book_ids), index=order_book_ids) # Clean data if asset_type is None: asset_type = "fund" end_date = rqdatac.get_previous_trading_date(start_date) end_date = pd.to_datetime(end_date) clean_period_prices, reset_start_date = (pf.data_process(order_book_ids, asset_type, start_date, windows)[i] for i in [0, 2]) if excess_return_cov_uncertainty is None: excess_return_cov_uncertainty = 1 / clean_period_prices.shape[0] reset_start_date = rqdatac.get_next_trading_date(reset_start_date) # Take daily risk free rate if risk_free_rate_tenor is None: risk_free_rate = rqdatac.get_yield_curve(reset_start_date, end_date, tenor='0S', country='cn') elif risk_free_rate_tenor in risk_free_rate_dict: risk_free_rate = rqdatac.get_yield_curve(reset_start_date, end_date, tenor=risk_free_rate_tenor, country='cn') risk_free_rate['Daily'] = pd.Series(np.power(1 + risk_free_rate['0S'], 1 / 365) - 1, index=risk_free_rate.index) # Calculate daily risk premium for each equity clean_period_prices_pct_change = clean_period_prices.pct_change() clean_period_excess_return = clean_period_prices_pct_change.subtract(risk_free_rate['Daily'], axis=0) # Wash out the ones in kick_out_list clean_market_weight = market_weight.loc[clean_period_prices.columns.values] temp_sum_weight = clean_market_weight.sum() clean_market_weight = clean_market_weight.div(temp_sum_weight) # If no risk_aversion_coefficient is passed in, then # risk_aversion_coefficient = market portfolio risk premium / market portfolio volatility if risk_aversion_coefficient is None: market_portfolio_return = np.dot(clean_period_prices_pct_change, clean_market_weight) risk_aversion_coefficient = ((market_portfolio_return[1:].mean()-risk_free_rate["Daily"].mean()) / market_portfolio_return[1:].var()) equilibrium_return = np.multiply(np.dot(clean_period_excess_return[1:].cov(), clean_market_weight), risk_aversion_coefficient) clean_period_excess_return_cov = clean_period_excess_return[1:].cov() # Generate the investors_views_uncertainty matrix if none is passed in if investors_views_uncertainty is None: if confidence_of_views is None: # He and Litteman's(1999) method to generate the uncertainty diagonal matrix, confidence level on each view # doesn't need. Omeg_diag = list() for i in range(investors_views_indicate_M.shape[0]): temp = np.dot(np.dot(investors_views_indicate_M[i, :], clean_period_excess_return_cov), investors_views_indicate_M[i, :].transpose()) * excess_return_cov_uncertainty Omeg_diag.append(temp.item(0)) investors_views_uncertainty = np.diag(Omeg_diag) else: # Idzorek's(2002) method, users can specify their confidence level on each view. Omeg_diag = list() for i in range(len(investors_views)): part1 = excess_return_cov_uncertainty * np.dot(clean_period_excess_return_cov, investors_views_indicate_M[i, :].transpose()) part2 = 1 / (excess_return_cov_uncertainty*np.dot(investors_views_indicate_M[i, :], np.dot(clean_period_excess_return_cov, investors_views_indicate_M[i, :].transpose()))) part3 = investors_views[i]-np.dot(investors_views_indicate_M[i, :], equilibrium_return) return_with_full_confidence = equilibrium_return + np.multiply(part2 * part3, part1) weights_with_full_confidence = np.dot(np.linalg.inv(np.multiply(risk_aversion_coefficient, clean_period_excess_return_cov)), return_with_full_confidence) temp1 = weights_with_full_confidence-clean_market_weight temp2 = np.multiply(confidence_of_views[i], np.absolute(investors_views_indicate_M[i, :].transpose())) tilt = np.multiply(temp1, temp2) weights_with_partial_confidence =clean_market_weight.as_matrix() + tilt def objective_fun(x): temp1 = np.linalg.inv(np.multiply(risk_aversion_coefficient, clean_period_excess_return_cov)) temp2 = np.linalg.inv(np.linalg.inv(np.multiply(excess_return_cov_uncertainty, clean_period_excess_return_cov)) + np.multiply(np.reciprocal(x), np.dot(investors_views_indicate_M[i, :].transpose(), investors_views_indicate_M[i, :]))) temp3 = (np.dot(np.linalg.inv(np.multiply(excess_return_cov_uncertainty, clean_period_excess_return_cov)), equilibrium_return) + np.multiply(investors_views[i]*np.reciprocal(x), investors_views_indicate_M[i, :].transpose())) wk = np.dot(temp1, np.dot(temp2, temp3)) return np.linalg.norm(np.subtract(weights_with_partial_confidence, wk)) # Upper bound should be consistent with the magnitude of return upper_bound = equilibrium_return.mean()*100 omega_k = sc_opt.minimize_scalar(objective_fun, bounds=(10**-8, upper_bound), method="bounded", options={"xatol": 10**-8}) Omeg_diag.append(omega_k.x.item(0)) investors_views_uncertainty = np.diag(Omeg_diag) # Combine all the information above to get the distribution of expected return with given views combined_return_covar = np.linalg.inv(np.linalg.inv(np.multiply(excess_return_cov_uncertainty, clean_period_excess_return_cov)) + np.dot(np.dot(investors_views_indicate_M.transpose(), np.linalg.inv(investors_views_uncertainty)), investors_views_indicate_M)) temp1 = np.dot(np.linalg.inv(np.multiply(excess_return_cov_uncertainty, clean_period_excess_return_cov)), equilibrium_return) temp2 = np.dot(np.dot(investors_views_indicate_M.transpose(), np.linalg.inv(investors_views_uncertainty)), investors_views) temp = temp1 + temp2 combined_return_mean = np.dot(combined_return_covar, temp) return combined_return_mean, combined_return_covar, risk_aversion_coefficient, investors_views_uncertainty
import math import pandas as pd import numpy as np import datetime import rqdatac as rq from rqdatac import * rq.init("ricequant", "8ricequant8", ('10.29.135.119', 16010)) # 参数 inputPath = "E:/中泰证券/策略/潜伏业绩预增策略/每日跟踪调整202002/结果_10/" outputPath = "E:/中泰证券/策略/潜伏业绩预增策略/每日跟踪调整202002/结果_10/" start_date = "2020-01-01" # end_date = datetime.datetime.now().strftime('%Y-%m-%d') end_date = rq.get_previous_trading_date(datetime.datetime.now(), 1).strftime('%Y-%m-%d') hold_length = 10 tax_cost = 0.001 tran_cost = 0.002 unit_amount = 1e6 index_code = '000905.XSHG' index_multiplier = 200 # 数据导入 df_buy_sell = pd.read_csv(inputPath + "汇总个股买卖时点.csv", index_col=0, engine='python') df_buy_sell.drop_duplicates(inplace=True) df_buy_sell.sort_values(by='buy_date', axis=0, ascending=True, inplace=True) df_buy_sell = df_buy_sell.reset_index(drop=True)
def factor_return_estimation(latest_trading_date, factor_exposure): previous_trading_date = rqdatac.get_previous_trading_date( latest_trading_date) # 计算无风险日收益率 daily_return = rqdatac.get_price( order_book_ids=factor_exposure.index.tolist(), start_date=previous_trading_date, end_date=latest_trading_date, fields='close').pct_change()[-1:].T compounded_risk_free_return = rqdatac.get_yield_curve( start_date=latest_trading_date, end_date=latest_trading_date, tenor='3M')['3M'] daily_risk_free_return = (((1 + compounded_risk_free_return)**(1 / 252)) - 1) daily_excess_return = daily_return.subtract( daily_risk_free_return.values).T # 以市场平方根作为加权最小二乘法的加权系数 market_cap = rqdatac.get_factor( id_or_symbols=factor_exposure.index.tolist(), factor='a_share_market_val', start_date=previous_trading_date, end_date=previous_trading_date) missing_market_cap_stock = market_cap[market_cap.isnull() == True].index.tolist() if len(missing_market_cap_stock) > 0: price = rqdatac.get_price(missing_market_cap_stock, previous_trading_date, previous_trading_date, fields='close', frequency='1d').T shares = rqdatac.get_shares(missing_market_cap_stock, previous_trading_date, previous_trading_date, fields='total_a').T market_cap[market_cap.isnull() == True] = ( price * shares)[previous_trading_date] normalized_regression_weight = market_cap.pow(0.5) / market_cap.pow( 0.5).sum() # 各行业市值之和,用于行业收益率约束条件 if str(previous_trading_date) > '2014-01-01': industry_factors = ['农林牧渔', '采掘', '化工', '钢铁', '有色金属', '电子', '家用电器', '食品饮料', '纺织服装', '轻工制造',\ '医药生物', '公用事业', '交通运输', '房地产', '商业贸易', '休闲服务','综合', '建筑材料', '建筑装饰', '电气设备',\ '国防军工', '计算机', '传媒', '通信', '银行', '非银金融', '汽车', '机械设备'] else: industry_factors = [ '金融服务', '房地产', '医药生物', '有色金属', '餐饮旅游', '综合', '建筑建材', '家用电器', '交运设备', '食品饮料', '电子', '信息设备', '交通运输', '轻工制造', '公用事业', '机械设备', '纺织服装', '农林牧渔', '商业贸易', '化工', '信息服务', '采掘', '黑色金属' ] #style_factor = ['beta', 'momentum', 'earnings_yield', 'residual_volatility', 'growth', 'book_to_price', # 'leverage', 'liquidity','size','non_linear_size'] industry_total_market_cap = market_cap.dot( factor_exposure.loc[market_cap.index][industry_factors]) factor_return_series = pd.DataFrame() # 对10个风格因子不添加约束,对 GICS 32个行业添加约束 factor_return_series['whole_market'] = constrainted_weighted_least_square(Y = daily_excess_return[market_cap.index].values[0], X = factor_exposure.loc[market_cap.index], weight = normalized_regression_weight,\ industry_total_market_cap = industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(industry_total_market_cap)) # 沪深300 csi_300_components = rqdatac.index_components(index_name='000300.XSHG', date=previous_trading_date) csi_300_components = list( set(market_cap.index.tolist()).intersection(set(csi_300_components))) # 各行业市值之和,用于行业收益率约束条件 csi_300_industry_total_market_cap = market_cap[csi_300_components].dot( factor_exposure[industry_factors].loc[csi_300_components]) # 若行业市值之和小于100,则认为基准没有配置该行业 missing_industry = csi_300_industry_total_market_cap[ csi_300_industry_total_market_cap < 100].index csi_300_industry_total_market_cap = csi_300_industry_total_market_cap.drop( missing_industry) # 将沪深300股票池中非线性市值暴露度与市值暴露度做正交化处理,根据定义重新计算 csi_300_factor_exposure = factor_exposure.loc[csi_300_components] csi_300_factor_exposure['non_linear_size'] = orthogonalize( target_variable=np.power(csi_300_factor_exposure['size'], 3), reference_variable=csi_300_factor_exposure['size'], regression_weight=np.sqrt(market_cap[csi_300_components]) / (np.sqrt(market_cap[csi_300_components]).sum())) factor_return_series['csi_300'] = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][csi_300_components].values[0], X = csi_300_factor_exposure.drop(missing_industry, axis=1), weight = normalized_regression_weight[factor_exposure.index][csi_300_components],\ industry_total_market_cap = csi_300_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_300_industry_total_market_cap)) # 中证500 csi_500_components = rqdatac.index_components(index_name='000905.XSHG', date=previous_trading_date) csi_500_components = list( set(market_cap.index.tolist()).intersection(set(csi_500_components))) csi_500_industry_total_market_cap = market_cap[csi_500_components].dot( factor_exposure[industry_factors].loc[csi_500_components]) missing_industry = csi_500_industry_total_market_cap[ csi_500_industry_total_market_cap < 100].index csi_500_industry_total_market_cap = csi_500_industry_total_market_cap.drop( missing_industry) # 将中证500股票池中非线性市值暴露度与市值暴露度做正交化处理,根据定义重新计算 csi_500_factor_exposure = factor_exposure.loc[csi_500_components] csi_500_factor_exposure['non_linear_size'] = orthogonalize( target_variable=np.power(csi_500_factor_exposure['size'], 3), reference_variable=csi_500_factor_exposure['size'], regression_weight=np.sqrt(market_cap[csi_500_components]) / (np.sqrt(market_cap[csi_500_components]).sum())) factor_return_series['csi_500'] = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][csi_500_components].values[0], X = csi_500_factor_exposure.drop(missing_industry, axis=1), weight = normalized_regression_weight[factor_exposure.index][csi_500_components],\ industry_total_market_cap = csi_500_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_500_industry_total_market_cap)) ### 中证800 csi_800_components = rqdatac.index_components(index_name='000906.XSHG', date=previous_trading_date) csi_800_components = list( set(market_cap.index.tolist()).intersection(set(csi_800_components))) csi_800_industry_total_market_cap = market_cap[csi_800_components].dot( factor_exposure[industry_factors].loc[csi_800_components]) missing_industry = csi_800_industry_total_market_cap[ csi_800_industry_total_market_cap < 100].index csi_800_industry_total_market_cap = csi_800_industry_total_market_cap.drop( missing_industry) # 将中证800股票池中非线性市值暴露度与市值暴露度做正交化处理,根据定义重新计算 csi_800_factor_exposure = factor_exposure.loc[csi_800_components] csi_800_factor_exposure['non_linear_size'] = orthogonalize( target_variable=np.power(csi_800_factor_exposure['size'], 3), reference_variable=csi_800_factor_exposure['size'], regression_weight=np.sqrt(market_cap[csi_800_components]) / (np.sqrt(market_cap[csi_800_components]).sum())) factor_return_series['csi_800'] = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][csi_800_components].values[0], X = csi_800_factor_exposure.drop(missing_industry, axis =1), weight = normalized_regression_weight[factor_exposure.index][csi_800_components],\ industry_total_market_cap = csi_800_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_800_industry_total_market_cap)) # 若指数在特定行业中没有配置任何股票,则因子收益率为 0 return factor_return_series.replace(np.nan, 0)
def factor_return_estimation(date, factor_exposure, industry_factors): latest_trading_date = rqdatac.get_previous_trading_date( datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1)) previous_trading_date = rqdatac.get_previous_trading_date( latest_trading_date) # 计算无风险日收益率 daily_return = rqdatac.get_price( order_book_ids=factor_exposure.index.tolist(), start_date=previous_trading_date, end_date=latest_trading_date, fields='close').pct_change()[-1:].T compounded_risk_free_return = rqdatac.get_yield_curve( start_date=latest_trading_date, end_date=latest_trading_date, tenor='3M')['3M'] daily_risk_free_return = (((1 + compounded_risk_free_return)**(1 / 252)) - 1) daily_excess_return = daily_return.subtract( daily_risk_free_return.values).T # 以市场平方根作为加权最小二乘法的加权系数 market_cap = rqdatac.get_factor( id_or_symbols=factor_exposure.index.tolist(), factor='a_share_market_val', start_date=previous_trading_date, end_date=previous_trading_date) if market_cap.isnull().sum() >= 30: market_cap_df = rqdatac.get_fundamentals( rqdatac.query(rqdatac.fundamentals.eod_derivative_indicator. a_share_market_val), entry_date=previous_trading_date, interval='1d').major_xs(previous_trading_date)[ 'a_share_market_val'].loc[factor_exposure.index] if market_cap_df.isnull().sum() >= 30: raise ValueError('市值出现大量缺失') else: market_cap = market_cap_df else: market_cap = market_cap.dropna() normalized_regression_weight = market_cap.pow(0.5) / market_cap.pow( 0.5).sum() # 各行业市值之和,用于行业收益率约束条件 industry_total_market_cap = market_cap.dot( factor_exposure.loc[market_cap.index][industry_factors]) #factor_return_series = pd.DataFrame() # 对10个风格因子不添加约束,对 GICS 32个行业添加约束 factor_return_series = constrainted_weighted_least_square(Y = daily_excess_return[market_cap.index].values[0], X = factor_exposure.loc[market_cap.index], weight = normalized_regression_weight,\ industry_total_market_cap = industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(industry_total_market_cap)) return factor_return_series.replace(np.nan, 0)
# Wildcard的exclude列表为空,即对所有风格/行业设置相同的约束,其中使用中信行业分类 cons = [ rqoptimizer2.WildcardIndustryConstraint( lower_limit=-0.01, upper_limit=0.01, relative=True, hard=False, classification=rqoptimizer2.IndustryClassification.ZX), rqoptimizer2.WildcardStyleConstraint(lower_limit=-0.3, upper_limit=0.3, relative=True, hard=False) ] # 获取前一交易日中证800成分股的净利润增长率(TTM) previous_date = rqdatac.get_previous_trading_date(date) index_component = rqdatac.index_components('000906.XSHG', previous_date) indicator_series = rqdatac.get_factor(index_component, 'net_profit_growth_ratio_ttm', previous_date, previous_date).dropna() selected_stock = generate_stock_pool(previous_date, indicator_series, stock_number=5) # 个股指标得分范围调整至0.1-1.1,避免权重过分集中于部分指标得分较大的个股 adjusted_series = ((indicator_series.loc[selected_stock] - indicator_series.loc[selected_stock].min()) / (indicator_series.loc[selected_stock].max() - indicator_series.loc[selected_stock].min())) + 0.1 portfolio_weight = rqoptimizer2.portfolio_optimize( selected_stock, date,
'password': '******', 'host': 'sh-cdb-oarey71m.sql.tencentcdb.com', 'port': 60993, 'database': 'jarvis' } engine = create_engine( 'mysql+pymysql://%(user)s:%(password)s@%(host)s:%(port)s/%(database)s' % dbinfo) conn = engine.connect() return conn w.start() conn = _local_conn() date_end = rq.get_previous_trading_date(datetime.today().date(), 1) date_start = rq.get_previous_trading_date(date_end, 20) date = date_end.strftime('%Y-%m-%d') sql = f''' SELECT * FROM jarvis.lgt_inst_north_cap_em JOIN jarvis.lgt_inst_info_em USING (inst_code) WHERE as_of_date = '{date_end}' ORDER by stock_cap DESC ''' info = pd.read_sql(sql, conn) inst_code = tuple(info[:20]['inst_code']) inst_code_all = tuple(info['inst_code']) sql = f'''
def get_style_factors(date): latest_trading_date = rqdatac.get_previous_trading_date( datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1)) trading_date_252_before = \ rqdatac.get_trading_dates(latest_trading_date - timedelta(days=500), latest_trading_date, country='cn')[-252] stock_list = rqdatac.all_instruments( type='CS', date=latest_trading_date)['order_book_id'].values.tolist() stock_excess_return, market_portfolio_excess_return = get_daily_excess_return( stock_list, trading_date_252_before.strftime('%Y-%m-%d'), latest_trading_date.strftime('%Y-%m-%d')) market_cap_on_current_day = rqdatac.get_factor( id_or_symbols=stock_excess_return.columns.tolist(), factor='market_cap', start_date=latest_trading_date.strftime('%Y-%m-%d'), end_date=latest_trading_date.strftime('%Y-%m-%d')) size_exposure = size(market_cap_on_current_day) non_linear_size_exposure = non_linear_size(size_exposure, market_cap_on_current_day) market_portfolio_beta, market_portfolio_beta_exposure = get_market_portfolio_beta( stock_excess_return, market_portfolio_excess_return, market_cap_on_current_day) daily_standard_deviation_exposure = get_daily_standard_deviation( stock_excess_return, market_cap_on_current_day) cumulative_range_exposure = get_cumulative_range( stock_list, latest_trading_date, market_cap_on_current_day) historical_sigma_exposure = get_historical_sigma( stock_excess_return, market_portfolio_excess_return, market_portfolio_beta, market_portfolio_beta_exposure, market_cap_on_current_day) residual_volatility_exposure = 0.74 * daily_standard_deviation_exposure + 0.16 * cumulative_range_exposure + 0.1 * historical_sigma_exposure momentum_exposure = get_momentum(stock_list, latest_trading_date, market_cap_on_current_day) liquidity_exposure = get_liquidity(stock_list, latest_trading_date, market_cap_on_current_day) style_factors = pd.concat([ size_exposure, non_linear_size_exposure, market_portfolio_beta_exposure, residual_volatility_exposure, momentum_exposure, liquidity_exposure ], axis=1) style_factors.columns = [ 'size', 'non_linear_size', 'beta', 'residual_volatility', 'momentum', 'liquidity' ] return style_factors
def get_explicit_factor_returns(date, stock_list): """ :param date:日期 :return: pandas.Series """ previous_trading_date = rqdatac.get_previous_trading_date(date) factor_exposures = rqdatac.get_style_factor_exposure( stock_list, previous_trading_date, previous_trading_date, "all").sort_index() factor_exposures.index = factor_exposures.index.droplevel(1) priceChange = rqdatac.get_price( stock_list, rqdatac.get_previous_trading_date(previous_trading_date), previous_trading_date, fields="close").pct_change().iloc[-1] def _calc_explicitReturns_with_stocksList(stocksList): # 根据股票池计算收益率 _sizeBeta = factor_exposures[['size', 'beta']].loc[stocksList] _quantileGroup = _sizeBeta.apply( lambda x: pd.cut(x, bins=3, labels=False) + 1).reset_index() _quantileStocks = _quantileGroup.groupby( ['size', 'beta']).apply(lambda x: x.index.tolist()) market_neutralize_stocks = _quantileStocks.apply(lambda x: pd.Series( stocksList).loc[x].values.tolist()).values.tolist() return factor_exposures.loc[stocksList].apply( lambda x, y=market_neutralize_stocks: _calc_single_explicit_returns(x, y)) def _calc_single_explicit_returns(_factor_exposure, market_neutralize_stocks): # 计算单一因子收益率 def _deuce(series): median = series.median() return [ series[series <= median].index.tolist(), series[series > median].index.tolist() ] deuceResults = np.array([ _deuce(_factor_exposure[neutralized_stks]) for neutralized_stks in market_neutralize_stocks ]).flatten() short_stocksList = list( reduce( lambda x, y: set(x) | set(y), np.array([s for i, s in enumerate(deuceResults) if i % 2 == 0]))) long_stockList = list( reduce( lambda x, y: set(x) | set(y), np.array([s for i, s in enumerate(deuceResults) if i % 2 == 1]))) return priceChange[long_stockList].mean( ) - priceChange[short_stocksList].mean() results = _calc_explicitReturns_with_stocksList(stock_list) return results
'CNE5S_MEDIA', 'CNE5S_RETAIL', 'CNE5S_PERSPRD', 'CNE5S_BEV', 'CNE5S_FOODPROD', 'CNE5S_HEALTH', 'CNE5S_BANKS', 'CNE5S_DVFININS', 'CNE5S_REALEST', 'CNE5S_SOFTWARE', 'CNE5S_HDWRSEMI', 'CNE5S_UTILITIE' ] style_factors = [ 'CNE5S_BETA', 'CNE5S_MOMENTUM', 'CNE5S_SIZE', 'CNE5S_EARNYILD', 'CNE5S_RESVOL', 'CNE5S_GROWTH', 'CNE5S_BTOP', 'CNE5S_LEVERAGE', 'CNE5S_LIQUIDTY', 'CNE5S_SIZENL' ] country_factor = ['CNE5S_COUNTRY'] all_factors = industry_factors + style_factors + country_factor latest_trading_date = rqdatac.get_previous_trading_date( (datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1))) current_factor_return, multiperiod_factor_returns = get_multiperiod_factor_returns( all_factors, latest_trading_date, shortTermParameters) unadjusted_covariance_df = pd.DataFrame(index=all_factors, columns=all_factors) pre_volatility_covariance_df = pd.DataFrame(index=all_factors, columns=all_factors) fully_processed_covariance_df = pd.DataFrame(index=all_factors, columns=all_factors) for factor in all_factors: for factors in all_factors:
def get_style_factors(date): latest_trading_date = rqdatac.get_previous_trading_date( datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1)) trading_date_252_before = rqdatac.get_trading_dates(latest_trading_date - timedelta(days=500), latest_trading_date, country='cn')[-252] stock_list = rqdatac.all_instruments( type='CS', date=latest_trading_date)['order_book_id'].values.tolist() stock_excess_return, market_portfolio_excess_return = get_daily_excess_return( stock_list, trading_date_252_before.strftime('%Y-%m-%d'), latest_trading_date.strftime('%Y-%m-%d')) market_cap_on_current_day = rqdatac.get_factor( id_or_symbols=stock_excess_return.columns.tolist(), factor='a_share_market_val', start_date=latest_trading_date.strftime('%Y-%m-%d'), end_date=latest_trading_date.strftime('%Y-%m-%d')) size_exposure = size(market_cap_on_current_day) non_linear_size_exposure = non_linear_size(size_exposure, market_cap_on_current_day) market_portfolio_beta, market_portfolio_beta_exposure = get_market_portfolio_beta( stock_excess_return, market_portfolio_excess_return, market_cap_on_current_day) daily_standard_deviation_exposure = get_daily_standard_deviation( stock_excess_return, market_cap_on_current_day) cumulative_range_exposure = get_cumulative_range( stock_list, latest_trading_date, market_cap_on_current_day) historical_sigma_exposure = get_historical_sigma( stock_excess_return, market_portfolio_excess_return, market_portfolio_beta, market_portfolio_beta_exposure, market_cap_on_current_day) residual_volatility_exposure = 0.74 * daily_standard_deviation_exposure + 0.16 * cumulative_range_exposure + 0.1 * historical_sigma_exposure orthogonalized_residual_volatility_exposure = orthogonalize( target_variable=residual_volatility_exposure, reference_variable=market_portfolio_beta_exposure, regression_weight=np.sqrt(market_cap_on_current_day) / (np.sqrt(market_cap_on_current_day).sum())) residual_volatility_exposure = winsorization_and_market_cap_weighed_standardization( orthogonalized_residual_volatility_exposure, market_cap_on_current_day) momentum_exposure = get_momentum(stock_list, latest_trading_date, market_cap_on_current_day) liquidity_exposure = get_liquidity(stock_list, latest_trading_date, market_cap_on_current_day) earnings_to_price = get_earning_to_price_ratio( latest_trading_date.strftime('%Y-%m-%d'), market_cap_on_current_day) cash_earnings_to_price = get_cash_earnings_to_price_ratio( latest_trading_date.strftime('%Y-%m-%d'), market_cap_on_current_day) earnings_yield = earnings_to_price * (11 / 32) + cash_earnings_to_price * ( 21 / 32) earnings_yield = winsorization_and_market_cap_weighed_standardization( earnings_yield, market_cap_on_current_day) book_to_price = book_to_price_ratio( latest_trading_date.strftime('%Y-%m-%d'), market_cap_on_current_day) market_leverage = get_market_leverage( latest_trading_date.strftime('%Y-%m-%d'), market_cap_on_current_day) debt_to_asset = get_debt_to_asset(latest_trading_date.strftime('%Y-%m-%d'), market_cap_on_current_day) book_leverage = get_book_leverage(latest_trading_date.strftime('%Y-%m-%d'), market_cap_on_current_day) leverage = market_leverage * 0.38 + debt_to_asset * 0.35 + book_leverage * 0.27 leverage = winsorization_and_market_cap_weighed_standardization( leverage, market_cap_on_current_day) sales_growth = get_sales_growth(latest_trading_date.strftime('%Y-%m-%d'), year, market_cap_on_current_day) earnings_gorwth = get_earnings_growth( latest_trading_date.strftime('%Y-%m-%d'), year, market_cap_on_current_day) growth = sales_growth * (47 / 71) + earnings_gorwth * (24 / 71) growth = winsorization_and_market_cap_weighed_standardization( growth, market_cap_on_current_day) style_factors = pd.concat([ size_exposure, non_linear_size_exposure, market_portfolio_beta_exposure, residual_volatility_exposure, momentum_exposure, liquidity_exposure, earnings_yield, book_to_price, leverage, growth ], axis=1) style_factors.columns = [ 'size', 'non_linear_size', 'beta', 'residual_volatility', 'momentum', 'liquidity', 'earnings_yield', 'book_to_price', 'leverage', 'growth' ] return style_factors
def customized_factor_return_estimation(date, factor_exposure, stock_list): latest_trading_date = rqdatac.get_previous_trading_date( datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1)) previous_trading_date = rqdatac.get_previous_trading_date( latest_trading_date) # 计算无风险日收益率 daily_return = rqdatac.get_price( order_book_ids=factor_exposure.index.tolist(), start_date=previous_trading_date, end_date=latest_trading_date, fields='close').pct_change()[-1:].T compounded_risk_free_return = rqdatac.get_yield_curve( start_date=latest_trading_date, end_date=latest_trading_date, tenor='3M')['3M'] daily_risk_free_return = (((1 + compounded_risk_free_return)**(1 / 252)) - 1) daily_excess_return = daily_return.subtract( daily_risk_free_return.values).T # 以市场平方根作为加权最小二乘法的加权系数 market_cap = rqdatac.get_factor( id_or_symbols=factor_exposure.index.tolist(), factor='a_share_market_val', start_date=previous_trading_date, end_date=previous_trading_date) missing_market_cap_stock = market_cap[market_cap.isnull() == True].index.tolist() if len(missing_market_cap_stock) > 0: price = rqdatac.get_price(missing_market_cap_stock, previous_trading_date, previous_trading_date, fields='close', frequency='1d').T shares = rqdatac.get_shares(missing_market_cap_stock, previous_trading_date, previous_trading_date, fields='total_a').T market_cap[market_cap.isnull() == True] = ( price * shares)[previous_trading_date] normalized_regression_weight = market_cap.pow(0.5) / market_cap.pow( 0.5).sum() # 各行业市值之和,用于行业收益率约束条件 if str(previous_trading_date) > '2014-01-01': industry_factors = ['农林牧渔', '采掘', '化工', '钢铁', '有色金属', '电子', '家用电器', '食品饮料', '纺织服装', '轻工制造',\ '医药生物', '公用事业', '交通运输', '房地产', '商业贸易', '休闲服务','综合', '建筑材料', '建筑装饰', '电气设备',\ '国防军工', '计算机', '传媒', '通信', '银行', '非银金融', '汽车', '机械设备'] else: industry_factors = [ '金融服务', '房地产', '医药生物', '有色金属', '餐饮旅游', '综合', '建筑建材', '家用电器', '交运设备', '食品饮料', '电子', '信息设备', '交通运输', '轻工制造', '公用事业', '机械设备', '纺织服装', '农林牧渔', '商业贸易', '化工', '信息服务', '采掘', '黑色金属' ] style_factor = [ 'beta', 'momentum', 'earnings_yield', 'residual_volatility', 'growth', 'book_to_price', 'leverage', 'liquidity' ] stock_list = list( set(market_cap.index.tolist()).intersection(set(stock_list))) # 各行业市值之和,用于行业收益率约束条件 customized_industry_total_market_cap = market_cap[stock_list].dot( factor_exposure[industry_factors].loc[stock_list]) # 若行业市值之和小于100,则认为基准没有配置该行业 missing_industry = customized_industry_total_market_cap[ customized_industry_total_market_cap < 100].index csi_300_industry_total_market_cap = customized_industry_total_market_cap.drop( missing_industry) # 重新计算沪深300股票池中市值和非线性市值因子暴露度 size_exposure = get_size(market_cap[stock_list]) non_linear_size_exposure = get_non_linear_size(size_exposure, market_cap[stock_list]) # 其余风格因子做市值加权标准化处理 factors_exposure = factor_exposure.drop(missing_industry, axis=1).loc[stock_list] market_cap_mean = market_cap[stock_list].dot( factors_exposure[style_factor]) / market_cap[stock_list].sum() style_exposure = (factors_exposure[style_factor] - market_cap_mean) / (factors_exposure[style_factor].std()) # 将重新计算的市值和非线性市值暴露度和其余因子暴露度数据连接起来 style_exposure = pd.concat( [style_exposure, size_exposure, non_linear_size_exposure], axis=1) style_exposure.columns = style_factor + ['size', 'non_linear_size'] factor_exposure = pd.concat([ style_exposure, factor_exposure.drop(missing_industry, axis=1).loc[stock_list][industry_factors] ], axis=1) factor_exposure['comovement'] = 1 factor_return_series = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][stock_list].values[0], X=factor_exposure.drop(missing_industry, axis =1), weight = normalized_regression_weight[factor_exposure.index][stock_list],\ industry_total_market_cap = csi_300_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_300_industry_total_market_cap)) # 若指数在特定行业中没有配置任何股票,则因子收益率为 0 return factor_return_series.replace(np.nan, 0)
rq.init() count = 0 all_future_dataframe_columns = ['date', 'future_name', 'margin', 'open_interest', 'open_interest_price'] date_list = [] future_name_list = [] margin_list = [] open_interest_list = [] open_interest_price_list = [] DATA_PATH = "E:\\future_data\\all_data\\" NO_MAKER_DATA_PATH = "other_table\\" original_all_future_table = pd.read_csv(DATA_PATH + NO_MAKER_DATA_PATH + "all_future.csv") test1 = original_all_future_table['underlying_symbol'].drop_duplicates() # 获取当天的日期并转换为rq需要的格式 time_of_today = datetime.datetime.now().strftime("%Y%m%d") time_of_yesterday = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime("%Y%m%d") last_trading_day = rq.get_previous_trading_date(time_of_today, n=1) # date_list.append(last_trading_day) for temp_future_name in test1: # 获取截止当天为止每个期货品种的主力合约 temp_dominant_list = rq.get_dominant_future(temp_future_name, end_date=last_trading_day) print "当前合约为:" + temp_future_name # 最新的主力合约对应的具体合约 newest_domiant_contract = pd.DataFrame(temp_dominant_list) if 'dominant' in newest_domiant_contract.columns: if not newest_domiant_contract['dominant'].empty: newest_domiant_contract = pd.DataFrame(temp_dominant_list)['dominant'].unique()[-1] else: continue else: continue # 最新具体合约的当天收盘价
def get_customized_factor_return(universe, date, skip_suspended=True, skip_st_stocks=True, method='implicit'): """ PARAMETERS ---------- universe:list 用户自定义股票白名单。默认为 None。用户需传入和股票白名单相对应的 order_book_ids,例如:['600705.XSHG', ' 601555.XSHG'] date: str 计算日期(例如:‘2017-03-03’)。需注意股票白名单应为计算日期已上市股票。 skip_suspended: boolean 是否剔除白名单中当天停牌的股票。默认为 True。 skip_st_stocks: boolean 是否剔除白名单中的ST股。默认为 True。 method: str 计算方法。默认为'implicit'(隐式因子收益率),可选'explicit'(显式风格因子收益率) RETURN ---------- factor_return: Series, 依据用户指定的股票池计算出的因子(或风格因子)收益率,index 为因子名称。 """ latest_trading_date = str( rqdatac.get_previous_trading_date( datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1))) previous_trading_date = str( rqdatac.get_previous_trading_date(latest_trading_date)) # 依据用户的选择参数,对stock_list进行筛选 # 若用户选择剔除ST股: if skip_st_stocks == True: is_st_df = rqdatac.is_st_stock(universe, start_date=date, end_date=date) is_st_df.index = is_st_df.index.astype(str) universe = is_st_df.loc[date][is_st_df.loc[date].values == False].index.tolist() # 若用户选择剔除停牌股: if skip_suspended == True: trading_volume = rqdatac.get_price(universe, start_date=date, end_date=date, frequency='1d', fields='volume', country='cn') universe = trading_volume.loc[date][ trading_volume.loc[date].values > 0].index.tolist() # 计算指定股票池内股票前一交易日的行业暴露度 factor_exposure = get_exposure(universe, str(previous_trading_date)) # 根据上述暴露度计算因子收益率 if method == 'implicit': factor_return = customized_factor_return_estimation( date, factor_exposure, universe) else: factor_return = get_explicit_factor_returns(date, universe) return factor_return