def get_last_reported_values(financial_indicator, recent_report_type): # 取出当天所有出现的财报类型 unique_recent_report_type = recent_report_type.unique().tolist() last_reported_values = pd.Series() # 循环每一类型的报告,再合并返回 for report_type in unique_recent_report_type: stock_list = recent_report_type[recent_report_type == report_type].index.tolist() if len(stock_list) == 1: last_reported_values = last_reported_values.append( rqdatac.get_financials( rqdatac.query(financial_indicator).filter( rqdatac.financials.stockcode.in_(stock_list)), report_type)) else: last_reported_values = last_reported_values.append( rqdatac.get_financials( rqdatac.query(financial_indicator).filter( rqdatac.financials.stockcode.in_(stock_list)), report_type).iloc[0]) return last_reported_values
def get_earnings_growth(date, year, market_cap_on_current_day): recent_report, annual_report, annual_report_last_year, annual_report_2_year_ago, annual_report_3_year_ago, annual_report_4_year_ago = last_five_annual_report( date) growth_listed_date_threshold = (datetime.strptime(date, "%Y-%m-%d") - timedelta(days=1825)).strftime("%Y-%m-%d") growth_qualified_stocks = [ i for i in annual_report.index.tolist() if rqdatac.instruments(i).listed_date < growth_listed_date_threshold ] factor = pd.DataFrame(index=growth_qualified_stocks, columns=['EGRO']) for stock in growth_qualified_stocks: # 实际操作中发现有部分公司会在财报发布后对报表进行多次调整,调整后eps为空,比如'601519.XSHG',该公司报表在发布后经过多次调整,2014年年报主要财务指标表"基本eps"数据缺失,但是在利润表中"基本eps"数据存在, # 所以在取数据时进行判断,如果financial_indicator为首选表,income_statement 为备选表 query_f = rqdatac.query( rqdatac.financials.financial_indicator.earnings_per_share).filter( rqdatac.financials.stockcode.in_([stock])) query_i = rqdatac.query(rqdatac.financials.income_statement. basic_earnings_per_share).filter( rqdatac.financials.stockcode.in_([stock])) eps_recent = rqdatac.get_financials(query_f, annual_report[stock], '1q') if \ rqdatac.get_financials(query_f, annual_report[stock], '1q').isnull().sum() == 0 \ else rqdatac.get_financials(query_i, annual_report[stock], '1q') eps_last_year = rqdatac.get_financials(query_f, annual_report_last_year[stock], '1q') if \ rqdatac.get_financials(query_f, annual_report_last_year[stock], '1q').isnull().sum() == 0 \ else rqdatac.get_financials(query_i, annual_report_last_year[stock], '1q') eps_2_year_ago = rqdatac.get_financials(query_f, annual_report_2_year_ago[stock], '1q') if \ rqdatac.get_financials(query_f, annual_report_2_year_ago[stock], '1q').isnull().sum() == 0 \ else rqdatac.get_financials(query_i, annual_report_2_year_ago[stock], '1q') eps_3_year_ago = rqdatac.get_financials(query_f, annual_report_3_year_ago[stock], '1q') if \ rqdatac.get_financials(query_f, annual_report_3_year_ago[stock], '1q').isnull().sum() == 0 \ else rqdatac.get_financials(query_i, annual_report_3_year_ago[stock], '1q') eps_4_year_ago = rqdatac.get_financials(query_f, annual_report_4_year_ago[stock], '1q') if \ rqdatac.get_financials(query_f, annual_report_4_year_ago[stock], '1q').isnull().sum() == 0 \ else rqdatac.get_financials(query_i, annual_report_4_year_ago[stock], '1q') regression = linear_model.LinearRegression() eps = pd.Series([ eps_recent, eps_last_year, eps_2_year_ago, eps_3_year_ago, eps_4_year_ago ]).fillna(value=0) regression.fit(year.reshape(-1, 1), eps) factor['EGRO'][stock] = float(regression.coef_) / abs(eps.mean()) earning_growth = winsorization_and_market_cap_weighed_standardization( factor['EGRO'], market_cap_on_current_day) return earning_growth
def get_recent_financial_report(date): previous_year = datetime.strptime(date, '%Y-%m-%d').year - 1 # 取出最近一期财务报告类型,例如 '2016q3' 或 '2016q4', 其中 '2016q3' 表示前三季度累计; '2016q4' 表示年报 recent_report_type = rqdatac.get_fundamentals( rqdatac.query(rqdatac.fundamentals.income_statement.net_profit), entry_date=date, report_quarter=True)['report_quarter'] annual_report_type = recent_report_type.copy() # 深拷贝 # 若上市公司未发布去年的财报,则取前年的年报为最新年报 if recent_report_type.T.iloc[0].values[0][:4] == str(previous_year): annual_report_type[annual_report_type != str(previous_year) + 'q4'] = str(previous_year - 1) + 'q4' else: annual_report_type[annual_report_type != str(previous_year) + 'q4'] = str(previous_year) + 'q4' # recent_report_type 和 annual_report_type 均为 dataframe 格式,输出时转为 Series 格式 return recent_report_type.T[date], annual_report_type.T[date]
def get_ttm_sum(financial_indicator, recent_report_type): def _get_ttm_date(quarter): # 假设最新的为年报,则为年报数值 if quarter[-2:] == "q4": return [np.nan, np.nan, quarter] # 假设当前为1/2/3季度报 elif quarter[-2:] == "q3" or quarter[-2:] == "q2" or quarter[ -2:] == "q1": return [ str(int(quarter[:4]) - 1) + quarter[-2:], str(int(quarter[:4]) - 1) + "q4", quarter ] else: print(quarter) raise Exception("what?") # 获得所有股票中最新的quarter max_quarter = max(recent_report_type) # 获得所有股票前8期的财报数据 financial_data = rqdatac.get_financials(rqdatac.query(financial_indicator), quarter=max_quarter, interval="8q", country='cn').T effective_quarter = pd.DataFrame( recent_report_type.apply(_get_ttm_date).to_dict()).T # 获得每个股票计算ttm需要的三个财报日期 effective_quarter = effective_quarter.unstack() effective_quarter.index = effective_quarter.index.droplevel(0) merged_data = pd.DataFrame(effective_quarter) merged_data['mask'] = 1 previous_quarters_mask = merged_data.dropna().reset_index().pivot( index='index', columns=0, values='mask').reindex( columns=financial_data.columns).astype(float).replace( np.nan, 0).astype(bool) latest_data = financial_data.where(previous_quarters_mask) # (最近一期年报财务数据 + 最近一期报告财务数据 - 去年同期报告财务数据) def _calc_ttm(data): # print(data) data = data.dropna().sort_index() if len(data) > 1: return data.iloc[-2:].sum() - data.iloc[0] elif len(data) == 1: return data.iloc[0] else: return np.nan financial_values = { item[0]: _calc_ttm(item[1]) for item in latest_data.iterrows() } return pd.Series(financial_values)
def query(*entities): return rqdatac.query(*entities)
def factor_return_estimation(date, factor_exposure, industry_factors): latest_trading_date = rqdatac.get_previous_trading_date( datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1)) previous_trading_date = rqdatac.get_previous_trading_date( latest_trading_date) # 计算无风险日收益率 daily_return = rqdatac.get_price( order_book_ids=factor_exposure.index.tolist(), start_date=previous_trading_date, end_date=latest_trading_date, fields='close').pct_change()[-1:].T compounded_risk_free_return = rqdatac.get_yield_curve( start_date=latest_trading_date, end_date=latest_trading_date, tenor='3M')['3M'] daily_risk_free_return = (((1 + compounded_risk_free_return)**(1 / 252)) - 1) daily_excess_return = daily_return.subtract( daily_risk_free_return.values).T # 以市场平方根作为加权最小二乘法的加权系数 market_cap = rqdatac.get_factor( id_or_symbols=factor_exposure.index.tolist(), factor='a_share_market_val', start_date=previous_trading_date, end_date=previous_trading_date) if market_cap.isnull().sum() >= 30: market_cap_df = rqdatac.get_fundamentals( rqdatac.query(rqdatac.fundamentals.eod_derivative_indicator. a_share_market_val), entry_date=previous_trading_date, interval='1d').major_xs(previous_trading_date)[ 'a_share_market_val'].loc[factor_exposure.index] if market_cap_df.isnull().sum() >= 30: raise ValueError('市值出现大量缺失') else: market_cap = market_cap_df else: market_cap = market_cap.dropna() normalized_regression_weight = market_cap.pow(0.5) / market_cap.pow( 0.5).sum() # 各行业市值之和,用于行业收益率约束条件 industry_total_market_cap = market_cap.dot( factor_exposure.loc[market_cap.index][industry_factors]) #factor_return_series = pd.DataFrame() # 对10个风格因子不添加约束,对 GICS 32个行业添加约束 factor_return_series = constrainted_weighted_least_square(Y = daily_excess_return[market_cap.index].values[0], X = factor_exposure.loc[market_cap.index], weight = normalized_regression_weight,\ industry_total_market_cap = industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(industry_total_market_cap)) return factor_return_series.replace(np.nan, 0)
def recent_five_annual_values(financial_indicator, date, recent_report_type): previous_year = datetime.strptime(date, '%Y-%m-%d').year - 1 # 获得最近一期报告为年报的股票列表 annual_report_published_stocks = recent_report_type[ recent_report_type == str(previous_year) + 'q4'].index.tolist() # 把 index 和 list 转为集合类型,再计算补集 annual_report_not_published_stocks = list( set(recent_report_type.index) - set(annual_report_published_stocks)) # 对于去年年报已经发布的上市公司,最近五期年报的列表 annual_report_published_list = [ str(previous_year) + 'q4', str(previous_year - 1) + 'q4', str(previous_year - 2) + 'q4', str(previous_year - 3) + 'q4', str(previous_year - 4) + 'q4' ] # 对于去年年报尚未经发布的上市公司,最近五期年报的列表 annual_report_not_published_list = [ str(previous_year - 1) + 'q4', str(previous_year - 2) + 'q4', str(previous_year - 3) + 'q4', str(previous_year - 4) + 'q4', str(previous_year - 5) + 'q4' ] # 获得最近一期报告为年报的股票列表 recent_five_reports = rqdatac.get_financials( rqdatac.query(financial_indicator), str(previous_year) + 'q4', '25q').T annual_report_published_values = recent_five_reports[ annual_report_published_list].loc[annual_report_published_stocks] annual_report_not_published_values = recent_five_reports[ annual_report_not_published_list].loc[ annual_report_not_published_stocks] # 重新命名 columns,方便合并 dataframes annual_report_published_values.columns = [ 'first', 'second', 'third', 'fourth', 'fifth' ] annual_report_not_published_values.columns = [ 'first', 'second', 'third', 'fourth', 'fifth' ] recent_five_reports_values = pd.concat( [annual_report_published_values, annual_report_not_published_values], axis=0) return recent_five_reports_values
def get_sales_growth(date, year, market_cap_on_current_day): recent_report, annual_report, annual_report_last_year, annual_report_2_year_ago, annual_report_3_year_ago, annual_report_4_year_ago = last_five_annual_report( date) growth_listed_date_threshold = (datetime.strptime(date, "%Y-%m-%d") - timedelta(days=1825)).strftime("%Y-%m-%d") growth_qualified_stocks = [ i for i in annual_report.index.tolist() if rqdatac.instruments(i).listed_date < growth_listed_date_threshold ] factor = pd.DataFrame(index=growth_qualified_stocks, columns=['SGRO']) # 根据年报数据计算每只股票过去五年每年的sales per share for stock in growth_qualified_stocks: query = rqdatac.query( rqdatac.financials.income_statement.revenue).filter( rqdatac.financials.stockcode.in_([stock])) sales_recent = rqdatac.get_financials(query, annual_report[stock], '1q') latest_trading_date_recent = str( rqdatac.get_previous_trading_date( datetime.strptime(annual_report[stock][:4] + '-12-31', '%Y-%m-%d') + timedelta(days=1))) shares_recent = rqdatac.get_shares( stock, start_date=latest_trading_date_recent, end_date=latest_trading_date_recent, fields='total') sales_per_share_recent = sales_recent.values / shares_recent.values sales_last_year = rqdatac.get_financials( query, annual_report_last_year[stock], '1q') latest_trading_date_last_year = str( rqdatac.get_previous_trading_date( datetime.strptime( annual_report_last_year[stock][:4] + '-12-31', '%Y-%m-%d') + timedelta(days=1))) shares_last_year = rqdatac.get_shares( stock, start_date=latest_trading_date_last_year, end_date=latest_trading_date_last_year, fields='total') sales_per_share_last_year = sales_last_year.values / shares_last_year.values sales_2_year_ago = rqdatac.get_financials( query, annual_report_2_year_ago[stock], '1q') latest_trading_date_2_year_ago = str( rqdatac.get_previous_trading_date( datetime.strptime( annual_report_2_year_ago[stock][:4] + '-12-31', '%Y-%m-%d') + timedelta(days=1))) shares_2_year_ago = rqdatac.get_shares( stock, start_date=latest_trading_date_2_year_ago, end_date=latest_trading_date_2_year_ago, fields='total') sales_per_share_2_year_ago = sales_2_year_ago.values / shares_2_year_ago.values sales_3_year_ago = rqdatac.get_financials( query, annual_report_3_year_ago[stock], '1q') latest_trading_date_3_year_ago = str( rqdatac.get_previous_trading_date( datetime.strptime( annual_report_3_year_ago[stock][:4] + '-12-31', '%Y-%m-%d') + timedelta(days=1))) shares_3_year_ago = rqdatac.get_shares( stock, start_date=latest_trading_date_3_year_ago, end_date=latest_trading_date_3_year_ago, fields='total') sales_per_share_3_year_ago = sales_3_year_ago.values / shares_3_year_ago.values sales_4_year_ago = rqdatac.get_financials( query, annual_report_4_year_ago[stock], '1q') latest_trading_date_4_year_ago = str( rqdatac.get_previous_trading_date( datetime.strptime( annual_report_4_year_ago[stock][:4] + '-12-31', '%Y-%m-%d') + timedelta(days=1))) shares_4_year_ago = rqdatac.get_shares( stock, start_date=latest_trading_date_4_year_ago, end_date=latest_trading_date_4_year_ago, fields='total') sales_per_share_4_year_ago = sales_4_year_ago.values / shares_4_year_ago.values regression = linear_model.LinearRegression() sales_per_share = pd.Series([ sales_per_share_recent, sales_per_share_last_year, sales_per_share_2_year_ago, sales_per_share_3_year_ago, sales_per_share_4_year_ago ]).fillna(value=0) regression.fit(year.reshape(-1, 1), sales_per_share) factor['SGRO'][stock] = float( regression.coef_) / abs(sales_per_share).mean() sale_growth = winsorization_and_market_cap_weighed_standardization( factor['SGRO'], market_cap_on_current_day) return sale_growth