def get_cash_earnings_to_price_ratio(latest_trading_date, recent_report_type, market_cap_on_current_day): cash_ttm = get_ttm_sum( rqdatac.financials.cash_flow_statement. cash_flow_from_operating_activities, recent_report_type) stock_list = cash_ttm.index.tolist() stock_price = rqdatac.get_price(stock_list, start_date=latest_trading_date, end_date=latest_trading_date, fields='close', adjust_type='none').T shares = rqdatac.get_shares(stock_list, start_date=latest_trading_date, end_date=latest_trading_date, fields='total').T cash_earning_to_price = cash_ttm / (stock_price * shares)[str(latest_trading_date)] processed_cash_earning_to_price = winsorization_and_market_cap_weighed_standardization( cash_earning_to_price, market_cap_on_current_day[cash_earning_to_price.index]) return processed_cash_earning_to_price
def get_earnings_to_price_ratio(latest_trading_date, recent_report_type, market_cap_on_current_day): net_profit_ttm = get_ttm_sum( rqdatac.financials.income_statement.profit_before_tax, recent_report_type) stock_list = net_profit_ttm.index.tolist() stock_price = rqdatac.get_price(stock_list, start_date=latest_trading_date, end_date=latest_trading_date, fields='close', adjust_type='none').T shares = rqdatac.get_shares(stock_list, start_date=latest_trading_date, end_date=latest_trading_date, fields='total').T earning_to_price = net_profit_ttm / (stock_price * shares)[str(latest_trading_date)] processed_earning_to_price = winsorization_and_market_cap_weighed_standardization( earning_to_price, market_cap_on_current_day[earning_to_price.index]) return processed_earning_to_price
def get_shares( order_book_ids, # type: Union[str, List[str]] count=1, # type: Optional[int] fields=None, # type: Optional[str] expect_df=False # type: Optional[bool] ): # type: (...) -> Union[pd.DataFrame, pd.Series] """ :param order_book_ids: 可输入 order_book_id, order_book_id list, symbol, symbol list :param count: 回溯获取的数据个数。默认为当前能够获取到的最近的数据 :param fields: 期望返回的字段,默认为所有字段。见下方列表 :param expect_df: 是否期望始终返回 DataFrame。pandas 0.25.0 以上该参数应设为 True,以避免因试图构建 Panel 产生异常 ========================= =================================================== fields 字段名 ========================= =================================================== total 总股本 circulation_a 流通A股 management_circulation 已流通高管持股 non_circulation_a 非流通A股合计 total_a A股总股本 ========================= =================================================== :return: 查询时间段内某个股票的流通情况,当 expect_df 为 False 且 fields 指定为单一字段的情况时返回 pandas.Series :example: 获取平安银行总股本数据: .. code-block:: python3 :linenos: logger.info(get_shares('000001.XSHE', count=5, fields='total')) #[Out] #2016-08-01 1.717041e+10 #2016-08-02 1.717041e+10 #2016-08-03 1.717041e+10 #2016-08-04 1.717041e+10 #2016-08-05 1.717041e+10 #Name: total, dtype: float64 """ env = Environment.get_instance() dt = env.trading_dt if count == 1: start_dt = dt else: start_dt = env.data_proxy.get_previous_trading_date(dt, count - 1) if isinstance(order_book_ids, six.string_types): order_book_ids = assure_order_book_id(order_book_ids) else: order_book_ids = [assure_order_book_id(i) for i in order_book_ids] return rqdatac.get_shares(order_book_ids, start_dt, dt, fields=fields, expect_df=expect_df)
def market_cap_imputation(stock_list, market_cap_on_current_day, latest_trading_date): missing_market_cap_list = list( set(stock_list) - set(market_cap_on_current_day.index.tolist())) price_on_current_day = rqdatac.get_price( missing_market_cap_list, start_date=latest_trading_date.strftime('%Y-%m-%d'), end_date=latest_trading_date.strftime('%Y-%m-%d'), frequency='1d', fields='close', adjust_type='none').T shares_on_current_day = rqdatac.get_shares( missing_market_cap_list, latest_trading_date.strftime('%Y-%m-%d'), latest_trading_date.strftime('%Y-%m-%d'), fields='total_a').T market_cap = pd.Series( data=(price_on_current_day * shares_on_current_day)[latest_trading_date.strftime('%Y-%m-%d')], index=missing_market_cap_list) if market_cap.isnull().any(): missing_list = market_cap[market_cap.isnull()].index.tolist() trading_date_22_before = rqdatac.get_trading_dates( latest_trading_date - timedelta(days=50), latest_trading_date, country='cn')[-22] missing_market_cap = (rqdatac.get_factor( id_or_symbols=missing_list, factor='a_share_market_val', start_date=trading_date_22_before.strftime('%Y-%m-%d'), end_date=latest_trading_date.strftime('%Y-%m-%d')).mean()).fillna( market_cap_on_current_day.mean()) market_cap = pd.concat([market_cap, missing_market_cap]) imputed_market_cap_on_current_day = pd.concat( [market_cap_on_current_day, market_cap]) return imputed_market_cap_on_current_day
def get_recent_five_annual_shares(stock_list, date): # 上市公司每年4月30日前必须公布当年报告。因此,取此前每年5月1日后第一个交易日的股票A股流通股本,作为当年的股本 previous_year = datetime.strptime(date, '%Y-%m-%d').year - 1 month = datetime.strptime(date, '%Y-%m-%d').month if month > 5: list_of_dates = [ str(previous_year) + '-05-01', str(previous_year - 1) + '-05-01', str(previous_year - 2) + '-05-01', str(previous_year - 3) + '-05-01', str(previous_year - 4) + '-05-01' ] else: list_of_dates = [ str(previous_year - 1) + '-05-01', str(previous_year - 2) + '-05-01', str(previous_year - 3) + '-05-01', str(previous_year - 4) + '-05-01', str(previous_year - 5) + '-05-01' ] recent_five_annual_shares = pd.DataFrame() for report_date in list_of_dates: next_trading_date = rqdatac.get_next_trading_date(report_date) recent_five_annual_shares[report_date] = rqdatac.get_shares( stock_list, start_date=next_trading_date.strftime('%Y-%m-%d'), end_date=next_trading_date.strftime('%Y-%m-%d'), fields='total_a').iloc[0] # 调整股本 dataframe 的列名,方便相除计算每股收入 recent_five_annual_shares.columns = [ 'first', 'second', 'third', 'fourth', 'fifth' ] return recent_five_annual_shares
def get_liquidity(stock_list, date, market_cap_on_current_day): trading_date_252_before = rqdatac.get_trading_dates(date - timedelta(days=500), date, country='cn')[-252] stock_without_suspended_stock = drop_suspended_stock(stock_list, date) trading_volume = rqdatac.get_price(stock_without_suspended_stock, trading_date_252_before, date, frequency='1d', fields='volume') outstanding_shares = rqdatac.get_shares(stock_without_suspended_stock, trading_date_252_before, date, fields='total_a') daily_turnover_rate = trading_volume.divide(outstanding_shares) # 对于对应时期内换手率为 0 的股票,其细分因子暴露度也设为0 one_month_share_turnover = winsorization_and_market_cap_weighed_standardization( np.log(daily_turnover_rate.iloc[-21:].sum().replace(0, np.nan)), market_cap_on_current_day) three_months_share_turnover = winsorization_and_market_cap_weighed_standardization( np.log(daily_turnover_rate.iloc[-63:].sum().replace(0, np.nan) / 3), market_cap_on_current_day) twelve_months_share_turnover = winsorization_and_market_cap_weighed_standardization( np.log(daily_turnover_rate.iloc[-252:].sum().replace(0, np.nan) / 12), market_cap_on_current_day) liquidity = 0.35 * one_month_share_turnover.replace( np.nan, 0) + 0.35 * three_months_share_turnover.replace( np.nan, 0) + 0.3 * twelve_months_share_turnover.replace(np.nan, 0) processed_liquidity = winsorization_and_market_cap_weighed_standardization( liquidity, market_cap_on_current_day) return processed_liquidity
def factor_return_estimation(latest_trading_date, factor_exposure): previous_trading_date = rqdatac.get_previous_trading_date( latest_trading_date) # 计算无风险日收益率 daily_return = rqdatac.get_price( order_book_ids=factor_exposure.index.tolist(), start_date=previous_trading_date, end_date=latest_trading_date, fields='close').pct_change()[-1:].T compounded_risk_free_return = rqdatac.get_yield_curve( start_date=latest_trading_date, end_date=latest_trading_date, tenor='3M')['3M'] daily_risk_free_return = (((1 + compounded_risk_free_return)**(1 / 252)) - 1) daily_excess_return = daily_return.subtract( daily_risk_free_return.values).T # 以市场平方根作为加权最小二乘法的加权系数 market_cap = rqdatac.get_factor( id_or_symbols=factor_exposure.index.tolist(), factor='a_share_market_val', start_date=previous_trading_date, end_date=previous_trading_date) missing_market_cap_stock = market_cap[market_cap.isnull() == True].index.tolist() if len(missing_market_cap_stock) > 0: price = rqdatac.get_price(missing_market_cap_stock, previous_trading_date, previous_trading_date, fields='close', frequency='1d').T shares = rqdatac.get_shares(missing_market_cap_stock, previous_trading_date, previous_trading_date, fields='total_a').T market_cap[market_cap.isnull() == True] = ( price * shares)[previous_trading_date] normalized_regression_weight = market_cap.pow(0.5) / market_cap.pow( 0.5).sum() # 各行业市值之和,用于行业收益率约束条件 if str(previous_trading_date) > '2014-01-01': industry_factors = ['农林牧渔', '采掘', '化工', '钢铁', '有色金属', '电子', '家用电器', '食品饮料', '纺织服装', '轻工制造',\ '医药生物', '公用事业', '交通运输', '房地产', '商业贸易', '休闲服务','综合', '建筑材料', '建筑装饰', '电气设备',\ '国防军工', '计算机', '传媒', '通信', '银行', '非银金融', '汽车', '机械设备'] else: industry_factors = [ '金融服务', '房地产', '医药生物', '有色金属', '餐饮旅游', '综合', '建筑建材', '家用电器', '交运设备', '食品饮料', '电子', '信息设备', '交通运输', '轻工制造', '公用事业', '机械设备', '纺织服装', '农林牧渔', '商业贸易', '化工', '信息服务', '采掘', '黑色金属' ] #style_factor = ['beta', 'momentum', 'earnings_yield', 'residual_volatility', 'growth', 'book_to_price', # 'leverage', 'liquidity','size','non_linear_size'] industry_total_market_cap = market_cap.dot( factor_exposure.loc[market_cap.index][industry_factors]) factor_return_series = pd.DataFrame() # 对10个风格因子不添加约束,对 GICS 32个行业添加约束 factor_return_series['whole_market'] = constrainted_weighted_least_square(Y = daily_excess_return[market_cap.index].values[0], X = factor_exposure.loc[market_cap.index], weight = normalized_regression_weight,\ industry_total_market_cap = industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(industry_total_market_cap)) # 沪深300 csi_300_components = rqdatac.index_components(index_name='000300.XSHG', date=previous_trading_date) csi_300_components = list( set(market_cap.index.tolist()).intersection(set(csi_300_components))) # 各行业市值之和,用于行业收益率约束条件 csi_300_industry_total_market_cap = market_cap[csi_300_components].dot( factor_exposure[industry_factors].loc[csi_300_components]) # 若行业市值之和小于100,则认为基准没有配置该行业 missing_industry = csi_300_industry_total_market_cap[ csi_300_industry_total_market_cap < 100].index csi_300_industry_total_market_cap = csi_300_industry_total_market_cap.drop( missing_industry) # 将沪深300股票池中非线性市值暴露度与市值暴露度做正交化处理,根据定义重新计算 csi_300_factor_exposure = factor_exposure.loc[csi_300_components] csi_300_factor_exposure['non_linear_size'] = orthogonalize( target_variable=np.power(csi_300_factor_exposure['size'], 3), reference_variable=csi_300_factor_exposure['size'], regression_weight=np.sqrt(market_cap[csi_300_components]) / (np.sqrt(market_cap[csi_300_components]).sum())) factor_return_series['csi_300'] = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][csi_300_components].values[0], X = csi_300_factor_exposure.drop(missing_industry, axis=1), weight = normalized_regression_weight[factor_exposure.index][csi_300_components],\ industry_total_market_cap = csi_300_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_300_industry_total_market_cap)) # 中证500 csi_500_components = rqdatac.index_components(index_name='000905.XSHG', date=previous_trading_date) csi_500_components = list( set(market_cap.index.tolist()).intersection(set(csi_500_components))) csi_500_industry_total_market_cap = market_cap[csi_500_components].dot( factor_exposure[industry_factors].loc[csi_500_components]) missing_industry = csi_500_industry_total_market_cap[ csi_500_industry_total_market_cap < 100].index csi_500_industry_total_market_cap = csi_500_industry_total_market_cap.drop( missing_industry) # 将中证500股票池中非线性市值暴露度与市值暴露度做正交化处理,根据定义重新计算 csi_500_factor_exposure = factor_exposure.loc[csi_500_components] csi_500_factor_exposure['non_linear_size'] = orthogonalize( target_variable=np.power(csi_500_factor_exposure['size'], 3), reference_variable=csi_500_factor_exposure['size'], regression_weight=np.sqrt(market_cap[csi_500_components]) / (np.sqrt(market_cap[csi_500_components]).sum())) factor_return_series['csi_500'] = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][csi_500_components].values[0], X = csi_500_factor_exposure.drop(missing_industry, axis=1), weight = normalized_regression_weight[factor_exposure.index][csi_500_components],\ industry_total_market_cap = csi_500_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_500_industry_total_market_cap)) ### 中证800 csi_800_components = rqdatac.index_components(index_name='000906.XSHG', date=previous_trading_date) csi_800_components = list( set(market_cap.index.tolist()).intersection(set(csi_800_components))) csi_800_industry_total_market_cap = market_cap[csi_800_components].dot( factor_exposure[industry_factors].loc[csi_800_components]) missing_industry = csi_800_industry_total_market_cap[ csi_800_industry_total_market_cap < 100].index csi_800_industry_total_market_cap = csi_800_industry_total_market_cap.drop( missing_industry) # 将中证800股票池中非线性市值暴露度与市值暴露度做正交化处理,根据定义重新计算 csi_800_factor_exposure = factor_exposure.loc[csi_800_components] csi_800_factor_exposure['non_linear_size'] = orthogonalize( target_variable=np.power(csi_800_factor_exposure['size'], 3), reference_variable=csi_800_factor_exposure['size'], regression_weight=np.sqrt(market_cap[csi_800_components]) / (np.sqrt(market_cap[csi_800_components]).sum())) factor_return_series['csi_800'] = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][csi_800_components].values[0], X = csi_800_factor_exposure.drop(missing_industry, axis =1), weight = normalized_regression_weight[factor_exposure.index][csi_800_components],\ industry_total_market_cap = csi_800_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_800_industry_total_market_cap)) # 若指数在特定行业中没有配置任何股票,则因子收益率为 0 return factor_return_series.replace(np.nan, 0)
def customized_factor_return_estimation(date, factor_exposure, stock_list): latest_trading_date = rqdatac.get_previous_trading_date( datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1)) previous_trading_date = rqdatac.get_previous_trading_date( latest_trading_date) # 计算无风险日收益率 daily_return = rqdatac.get_price( order_book_ids=factor_exposure.index.tolist(), start_date=previous_trading_date, end_date=latest_trading_date, fields='close').pct_change()[-1:].T compounded_risk_free_return = rqdatac.get_yield_curve( start_date=latest_trading_date, end_date=latest_trading_date, tenor='3M')['3M'] daily_risk_free_return = (((1 + compounded_risk_free_return)**(1 / 252)) - 1) daily_excess_return = daily_return.subtract( daily_risk_free_return.values).T # 以市场平方根作为加权最小二乘法的加权系数 market_cap = rqdatac.get_factor( id_or_symbols=factor_exposure.index.tolist(), factor='a_share_market_val', start_date=previous_trading_date, end_date=previous_trading_date) missing_market_cap_stock = market_cap[market_cap.isnull() == True].index.tolist() if len(missing_market_cap_stock) > 0: price = rqdatac.get_price(missing_market_cap_stock, previous_trading_date, previous_trading_date, fields='close', frequency='1d').T shares = rqdatac.get_shares(missing_market_cap_stock, previous_trading_date, previous_trading_date, fields='total_a').T market_cap[market_cap.isnull() == True] = ( price * shares)[previous_trading_date] normalized_regression_weight = market_cap.pow(0.5) / market_cap.pow( 0.5).sum() # 各行业市值之和,用于行业收益率约束条件 if str(previous_trading_date) > '2014-01-01': industry_factors = ['农林牧渔', '采掘', '化工', '钢铁', '有色金属', '电子', '家用电器', '食品饮料', '纺织服装', '轻工制造',\ '医药生物', '公用事业', '交通运输', '房地产', '商业贸易', '休闲服务','综合', '建筑材料', '建筑装饰', '电气设备',\ '国防军工', '计算机', '传媒', '通信', '银行', '非银金融', '汽车', '机械设备'] else: industry_factors = [ '金融服务', '房地产', '医药生物', '有色金属', '餐饮旅游', '综合', '建筑建材', '家用电器', '交运设备', '食品饮料', '电子', '信息设备', '交通运输', '轻工制造', '公用事业', '机械设备', '纺织服装', '农林牧渔', '商业贸易', '化工', '信息服务', '采掘', '黑色金属' ] style_factor = [ 'beta', 'momentum', 'earnings_yield', 'residual_volatility', 'growth', 'book_to_price', 'leverage', 'liquidity' ] stock_list = list( set(market_cap.index.tolist()).intersection(set(stock_list))) # 各行业市值之和,用于行业收益率约束条件 customized_industry_total_market_cap = market_cap[stock_list].dot( factor_exposure[industry_factors].loc[stock_list]) # 若行业市值之和小于100,则认为基准没有配置该行业 missing_industry = customized_industry_total_market_cap[ customized_industry_total_market_cap < 100].index csi_300_industry_total_market_cap = customized_industry_total_market_cap.drop( missing_industry) # 重新计算沪深300股票池中市值和非线性市值因子暴露度 size_exposure = get_size(market_cap[stock_list]) non_linear_size_exposure = get_non_linear_size(size_exposure, market_cap[stock_list]) # 其余风格因子做市值加权标准化处理 factors_exposure = factor_exposure.drop(missing_industry, axis=1).loc[stock_list] market_cap_mean = market_cap[stock_list].dot( factors_exposure[style_factor]) / market_cap[stock_list].sum() style_exposure = (factors_exposure[style_factor] - market_cap_mean) / (factors_exposure[style_factor].std()) # 将重新计算的市值和非线性市值暴露度和其余因子暴露度数据连接起来 style_exposure = pd.concat( [style_exposure, size_exposure, non_linear_size_exposure], axis=1) style_exposure.columns = style_factor + ['size', 'non_linear_size'] factor_exposure = pd.concat([ style_exposure, factor_exposure.drop(missing_industry, axis=1).loc[stock_list][industry_factors] ], axis=1) factor_exposure['comovement'] = 1 factor_return_series = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][stock_list].values[0], X=factor_exposure.drop(missing_industry, axis =1), weight = normalized_regression_weight[factor_exposure.index][stock_list],\ industry_total_market_cap = csi_300_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_300_industry_total_market_cap)) # 若指数在特定行业中没有配置任何股票,则因子收益率为 0 return factor_return_series.replace(np.nan, 0)
def get_sales_growth(date, year, market_cap_on_current_day): recent_report, annual_report, annual_report_last_year, annual_report_2_year_ago, annual_report_3_year_ago, annual_report_4_year_ago = last_five_annual_report( date) growth_listed_date_threshold = (datetime.strptime(date, "%Y-%m-%d") - timedelta(days=1825)).strftime("%Y-%m-%d") growth_qualified_stocks = [ i for i in annual_report.index.tolist() if rqdatac.instruments(i).listed_date < growth_listed_date_threshold ] factor = pd.DataFrame(index=growth_qualified_stocks, columns=['SGRO']) # 根据年报数据计算每只股票过去五年每年的sales per share for stock in growth_qualified_stocks: query = rqdatac.query( rqdatac.financials.income_statement.revenue).filter( rqdatac.financials.stockcode.in_([stock])) sales_recent = rqdatac.get_financials(query, annual_report[stock], '1q') latest_trading_date_recent = str( rqdatac.get_previous_trading_date( datetime.strptime(annual_report[stock][:4] + '-12-31', '%Y-%m-%d') + timedelta(days=1))) shares_recent = rqdatac.get_shares( stock, start_date=latest_trading_date_recent, end_date=latest_trading_date_recent, fields='total') sales_per_share_recent = sales_recent.values / shares_recent.values sales_last_year = rqdatac.get_financials( query, annual_report_last_year[stock], '1q') latest_trading_date_last_year = str( rqdatac.get_previous_trading_date( datetime.strptime( annual_report_last_year[stock][:4] + '-12-31', '%Y-%m-%d') + timedelta(days=1))) shares_last_year = rqdatac.get_shares( stock, start_date=latest_trading_date_last_year, end_date=latest_trading_date_last_year, fields='total') sales_per_share_last_year = sales_last_year.values / shares_last_year.values sales_2_year_ago = rqdatac.get_financials( query, annual_report_2_year_ago[stock], '1q') latest_trading_date_2_year_ago = str( rqdatac.get_previous_trading_date( datetime.strptime( annual_report_2_year_ago[stock][:4] + '-12-31', '%Y-%m-%d') + timedelta(days=1))) shares_2_year_ago = rqdatac.get_shares( stock, start_date=latest_trading_date_2_year_ago, end_date=latest_trading_date_2_year_ago, fields='total') sales_per_share_2_year_ago = sales_2_year_ago.values / shares_2_year_ago.values sales_3_year_ago = rqdatac.get_financials( query, annual_report_3_year_ago[stock], '1q') latest_trading_date_3_year_ago = str( rqdatac.get_previous_trading_date( datetime.strptime( annual_report_3_year_ago[stock][:4] + '-12-31', '%Y-%m-%d') + timedelta(days=1))) shares_3_year_ago = rqdatac.get_shares( stock, start_date=latest_trading_date_3_year_ago, end_date=latest_trading_date_3_year_ago, fields='total') sales_per_share_3_year_ago = sales_3_year_ago.values / shares_3_year_ago.values sales_4_year_ago = rqdatac.get_financials( query, annual_report_4_year_ago[stock], '1q') latest_trading_date_4_year_ago = str( rqdatac.get_previous_trading_date( datetime.strptime( annual_report_4_year_ago[stock][:4] + '-12-31', '%Y-%m-%d') + timedelta(days=1))) shares_4_year_ago = rqdatac.get_shares( stock, start_date=latest_trading_date_4_year_ago, end_date=latest_trading_date_4_year_ago, fields='total') sales_per_share_4_year_ago = sales_4_year_ago.values / shares_4_year_ago.values regression = linear_model.LinearRegression() sales_per_share = pd.Series([ sales_per_share_recent, sales_per_share_last_year, sales_per_share_2_year_ago, sales_per_share_3_year_ago, sales_per_share_4_year_ago ]).fillna(value=0) regression.fit(year.reshape(-1, 1), sales_per_share) factor['SGRO'][stock] = float( regression.coef_) / abs(sales_per_share).mean() sale_growth = winsorization_and_market_cap_weighed_standardization( factor['SGRO'], market_cap_on_current_day) return sale_growth
def customized_factor_return_estimation(date, factor_exposure, stock_list): latest_trading_date = rqdatac.get_previous_trading_date( datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1)) previous_trading_date = rqdatac.get_previous_trading_date( latest_trading_date) # 计算无风险日收益率 daily_return = rqdatac.get_price( order_book_ids=factor_exposure.index.tolist(), start_date=previous_trading_date, end_date=latest_trading_date, fields='close').pct_change()[-1:].T compounded_risk_free_return = rqdatac.get_yield_curve( start_date=latest_trading_date, end_date=latest_trading_date, tenor='3M')['3M'] daily_risk_free_return = (((1 + compounded_risk_free_return)**(1 / 252)) - 1) daily_excess_return = daily_return.subtract( daily_risk_free_return.values).T # 以市场平方根作为加权最小二乘法的加权系数 market_cap = rqdatac.get_factor( id_or_symbols=factor_exposure.index.tolist(), factor='a_share_market_val', start_date=previous_trading_date, end_date=previous_trading_date) missing_market_cap_stock = market_cap[market_cap.isnull() == True].index.tolist() if len(missing_market_cap_stock) > 0: price = rqdatac.get_price(missing_market_cap_stock, previous_trading_date, previous_trading_date, fields='close', frequency='1d').T shares = rqdatac.get_shares(missing_market_cap_stock, previous_trading_date, previous_trading_date, fields='total_a').T market_cap[market_cap.isnull() == True] = ( price * shares)[previous_trading_date] normalized_regression_weight = market_cap.pow(0.5) / market_cap.pow( 0.5).sum() # 各行业市值之和,用于行业收益率约束条件 if str(previous_trading_date) > '2014-01-01': industry_factors = ['农林牧渔', '采掘', '化工', '钢铁', '有色金属', '电子', '家用电器', '食品饮料', '纺织服装', '轻工制造',\ '医药生物', '公用事业', '交通运输', '房地产', '商业贸易', '休闲服务','综合', '建筑材料', '建筑装饰', '电气设备',\ '国防军工', '计算机', '传媒', '通信', '银行', '非银金融', '汽车', '机械设备'] else: industry_factors = [ '金融服务', '房地产', '医药生物', '有色金属', '餐饮旅游', '综合', '建筑建材', '家用电器', '交运设备', '食品饮料', '电子', '信息设备', '交通运输', '轻工制造', '公用事业', '机械设备', '纺织服装', '农林牧渔', '商业贸易', '化工', '信息服务', '采掘', '黑色金属' ] stock_list = list( set(market_cap.index.tolist()).intersection(set(stock_list))) # 各行业市值之和,用于行业收益率约束条件 customized_industry_total_market_cap = market_cap[stock_list].dot( factor_exposure[industry_factors].loc[stock_list]) # 若行业市值之和小于100,则认为基准没有配置该行业 missing_industry = customized_industry_total_market_cap[ customized_industry_total_market_cap < 100].index csi_300_industry_total_market_cap = customized_industry_total_market_cap.drop( missing_industry) factor_return_series = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][stock_list].values[0], X = factor_exposure.drop(missing_industry, axis =1).loc[stock_list], weight = normalized_regression_weight[factor_exposure.index][stock_list],\ industry_total_market_cap = csi_300_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_300_industry_total_market_cap)) # 若指数在特定行业中没有配置任何股票,则因子收益率为 0 return factor_return_series.replace(np.nan, 0)
def get_liquidity(stock_list, date, market_cap_on_current_day): trading_date_252_before = rqdatac.get_trading_dates(date - timedelta(days=500), date, country='cn')[-252] trading_volume = rqdatac.get_price(stock_list, trading_date_252_before, date, frequency='1d', fields='volume') inds = trading_volume.loc[date][trading_volume.loc[date].values == 0].index.tolist() stock_list = list(set(stock_list) - set(inds)) outstanding_shares = rqdatac.get_shares(stock_list, trading_date_252_before, date, fields='total_a') daily_turnover_rate = trading_volume[stock_list].divide(outstanding_shares) # 对于对应时期内换手率为 0 的股票,其细分因子暴露度也设为0 one_month_share_turnover = winsorization_and_market_cap_weighed_standardization( np.log(daily_turnover_rate.iloc[-21:].sum().replace(0, np.nan)), market_cap_on_current_day) three_months_share_turnover = winsorization_and_market_cap_weighed_standardization( np.log(daily_turnover_rate.iloc[-63:].sum().replace(0, np.nan) / 3), market_cap_on_current_day) twelve_months_share_turnover = winsorization_and_market_cap_weighed_standardization( np.log(daily_turnover_rate.iloc[-252:].sum().replace(0, np.nan) / 12), market_cap_on_current_day) atomic_descriptors_df = pd.concat([ one_month_share_turnover, three_months_share_turnover, twelve_months_share_turnover ], axis=1) atomic_descriptors_df.columns = [ 'one_month_share_turnover', 'three_months_share_turnover', 'twelve_months_share_turnover' ] atom_descriptors_weight = pd.Series(data=[0.35, 0.35, 0.3], index=[ 'one_month_share_turnover', 'three_months_share_turnover', 'twelve_months_share_turnover' ]) liquidity = atomic_descriptors_imputation_and_combination( atomic_descriptors_df, atom_descriptors_weight) processed_liquidity = winsorization_and_market_cap_weighed_standardization( liquidity, market_cap_on_current_day) return one_month_share_turnover, three_months_share_turnover, twelve_months_share_turnover, processed_liquidity