def get_recent_financial_report(date): previous_year = datetime.strptime(date, '%Y-%m-%d').year - 1 # 取出最近一期财务报告类型,例如 '2016q3' 或 '2016q4', 其中 '2016q3' 表示前三季度累计; '2016q4' 表示年报 recent_report_type = rqdatac.get_fundamentals( rqdatac.query(rqdatac.fundamentals.income_statement.net_profit), entry_date=date, report_quarter=True)['report_quarter'] annual_report_type = recent_report_type.copy() # 深拷贝 # 若上市公司未发布去年的财报,则取前年的年报为最新年报 if recent_report_type.T.iloc[0].values[0][:4] == str(previous_year): annual_report_type[annual_report_type != str(previous_year) + 'q4'] = str(previous_year - 1) + 'q4' else: annual_report_type[annual_report_type != str(previous_year) + 'q4'] = str(previous_year) + 'q4' # recent_report_type 和 annual_report_type 均为 dataframe 格式,输出时转为 Series 格式 return recent_report_type.T[date], annual_report_type.T[date]
def get_fundamentals(self, codes, start_date=None, years=10, type='y'): """ q = query(fds.eod_derivative_indicator.pe_ratio, fds.balance_sheet.total_assets, fds.balance_sheet.total_liabilities, fds.balance_sheet.total_equity_and_liabilities ).filter(fds.stockcode.in_(codes)) """ q = query( fds.financial_indicator.adjusted_return_on_equity_diluted).filter( fds.stockcode.in_(codes)) if start_date is None: start_date = public.getDate() print('-----', start_date) Y = str(years * 4) + 'q' res = rq.get_fundamentals(q, start_date, interval=Y, report_quarter=True) d = {} for c in codes: try: d[c] = res.minor_xs(c) except: continue return d
def get_fundamentals(query, entry_date=None, interval='1d', report_quarter=False, expect_df=False, **kwargs): user_log.warn('get_fundamentals is deprecated, use get_pit_financials_ex instead') env = Environment.get_instance() dt = env.calendar_dt.date() if entry_date is None and 'date' in kwargs: entry_date = kwargs.pop('date') if kwargs: raise RQInvalidArgument('unknown arguments: {}'.format(kwargs)) latest_query_day = dt - datetime.timedelta(days=1) if entry_date: entry_date = to_date(entry_date) if entry_date <= latest_query_day: query_date = entry_date else: raise RQInvalidArgument( _('in get_fundamentals entry_date {} is no earlier than test date {}').format(entry_date, dt)) else: query_date = latest_query_day result = rqdatac.get_fundamentals(query, query_date, interval, report_quarter=report_quarter, expect_df=expect_df) if result is None: return pd.DataFrame() if expect_df: return result if len(result.major_axis) == 1: frame = result.major_xs(result.major_axis[0]) # research 与回测返回的Frame维度相反 return frame.T return result
def get_index_component_industry_and_marketcap(matching_index, matching_date): """ get matching_index industry and market_cap :param matching_index: :param matching_date: :return: """ # get index components, industry i_c = rqdatac.index_components(matching_index, matching_date) matching_index_df = pd.DataFrame(index=i_c) matching_index_df['industry'] = [ rqdatac.shenwan_instrument_industry(s, matching_date) for s in matching_index_df.index ] # get index market_cap market_cap = rqdatac.get_fundamentals(query( fundamentals.eod_derivative_indicator.market_cap).filter( fundamentals.eod_derivative_indicator.stockcode.in_( matching_index_df.index)), entry_date=matching_date) market_cap_ = market_cap.loc[market_cap.items[0]].transpose() # paste them as one df matching_index_cap_df = pd.concat([matching_index_df, market_cap_], axis=1) # change the column name matching_index_cap_df.columns.values[1] = 'market_cap' # calculate each component's percent by its market_cap total_market_cap = sum(matching_index_cap_df.market_cap) matching_index_cap_df[ 'percent'] = matching_index_cap_df.market_cap / total_market_cap # sort them by industry and market_cap res = matching_index_cap_df.sort_values(['industry', 'market_cap']) return res
def get_stock_test_suite(start_t='2013-01-01', end_t='2017-07-05'): """ get alive stock test suite between dates (for test use between 20140101 to 2017) make sure it has IPO for at least one year and is never ST between dates :param start_t: :param end_t: :return: dic return a dic, key is 0-99. 0 is the biggest 100 1 is the second 101 ~ 200 stocks 2 is the smallest -200 ~ -101 stocks 3 is the smallest -100 ~ -1 stocks 4 is the biggest 50 + smallest 50 5 ~ 99 is the 28*3 combo (28: shenwan_industry category, 3: we split each cate by market cap) """ # get all stocks all_stocks0 = list(all_instruments(type='CS').order_book_id) # make sure stocks are alive during start_t ~ end_t all_stocks1 = [ i.order_book_id for i in instruments(all_stocks0) if i.listed_date <= start_t and ( i.de_listed_date == '0000-00-00' or end_t < i.de_listed_date) ] # rule out ST stocks temp0 = is_st_stock(all_stocks1, start_t, end_t).sum(axis=0) all_stocks2 = [i for i in all_stocks1 if temp0.loc[i] == 0] # calculate all their market_cap market_cap = rqdatac.get_fundamentals(query( fundamentals.eod_derivative_indicator.market_cap).filter( fundamentals.eod_derivative_indicator.stockcode.in_(all_stocks2)), entry_date='20140101') market_cap_ = market_cap.loc[market_cap.items[0]].transpose() stock_df = pd.DataFrame(index=all_stocks2) temp1 = pd.concat([stock_df, market_cap_], axis=1) temp1.columns = ['market_cap'] temp2 = temp1.sort_values( by='market_cap', ascending=False) # descending sort by market value # tag them with shenwan category temp2["industry"] = [shenwan_instrument_industry(s) for s in temp2.index ] # don't add date to shenwan_instrument_industry shenwan_name = temp2.industry.unique() stock_test_suite = {} # notice that temp2 is sorted by market cap stock_test_suite[0] = list(temp2.index[:100]) stock_test_suite[1] = list(temp2.index[100:200]) stock_test_suite[2] = list(temp2.index[-200:-100]) stock_test_suite[3] = list(temp2.index[-100:]) stock_test_suite[4] = list(temp2.index[:50]) + list(temp2.index[-50:]) # temp3 is sorted by industry first and then within industry by market cap in descending order temp3 = temp2.sort_values(by=['industry', 'market_cap'], ascending=False) # within industry tag them with [1,2,3] to split them into 3 categories for i in shenwan_name: index0 = temp3['industry'] == i len0 = sum(index0) len0_int = int(len0 / 3) len0_residual = len0 % 3 cate_temp = list(np.repeat([1, 2, 3], len0_int)) + [3] * len0_residual temp3.loc[index0, 'category'] = cate_temp # get the number of stocks within each industry sum_info = temp3.groupby(by='industry').size() safe_num = min(sum_info) / 3 # this number is for randint() use for i in range(5, 100): stock_test_suite[i] = [ temp3.loc[temp3.industry == a].loc[temp3.category == b].index[ np.random.randint(safe_num)] for a in shenwan_name for b in [1, 2, 3] ] return stock_test_suite
def factor_return_estimation(date, factor_exposure, industry_factors): latest_trading_date = rqdatac.get_previous_trading_date( datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1)) previous_trading_date = rqdatac.get_previous_trading_date( latest_trading_date) # 计算无风险日收益率 daily_return = rqdatac.get_price( order_book_ids=factor_exposure.index.tolist(), start_date=previous_trading_date, end_date=latest_trading_date, fields='close').pct_change()[-1:].T compounded_risk_free_return = rqdatac.get_yield_curve( start_date=latest_trading_date, end_date=latest_trading_date, tenor='3M')['3M'] daily_risk_free_return = (((1 + compounded_risk_free_return)**(1 / 252)) - 1) daily_excess_return = daily_return.subtract( daily_risk_free_return.values).T # 以市场平方根作为加权最小二乘法的加权系数 market_cap = rqdatac.get_factor( id_or_symbols=factor_exposure.index.tolist(), factor='a_share_market_val', start_date=previous_trading_date, end_date=previous_trading_date) if market_cap.isnull().sum() >= 30: market_cap_df = rqdatac.get_fundamentals( rqdatac.query(rqdatac.fundamentals.eod_derivative_indicator. a_share_market_val), entry_date=previous_trading_date, interval='1d').major_xs(previous_trading_date)[ 'a_share_market_val'].loc[factor_exposure.index] if market_cap_df.isnull().sum() >= 30: raise ValueError('市值出现大量缺失') else: market_cap = market_cap_df else: market_cap = market_cap.dropna() normalized_regression_weight = market_cap.pow(0.5) / market_cap.pow( 0.5).sum() # 各行业市值之和,用于行业收益率约束条件 industry_total_market_cap = market_cap.dot( factor_exposure.loc[market_cap.index][industry_factors]) #factor_return_series = pd.DataFrame() # 对10个风格因子不添加约束,对 GICS 32个行业添加约束 factor_return_series = constrainted_weighted_least_square(Y = daily_excess_return[market_cap.index].values[0], X = factor_exposure.loc[market_cap.index], weight = normalized_regression_weight,\ industry_total_market_cap = industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(industry_total_market_cap)) return factor_return_series.replace(np.nan, 0)