Exemple #1
0
def get_recent_financial_report(date):

    previous_year = datetime.strptime(date, '%Y-%m-%d').year - 1

    # 取出最近一期财务报告类型,例如 '2016q3' 或  '2016q4', 其中 '2016q3' 表示前三季度累计; '2016q4' 表示年报

    recent_report_type = rqdatac.get_fundamentals(
        rqdatac.query(rqdatac.fundamentals.income_statement.net_profit),
        entry_date=date,
        report_quarter=True)['report_quarter']

    annual_report_type = recent_report_type.copy()  # 深拷贝

    # 若上市公司未发布去年的财报,则取前年的年报为最新年报

    if recent_report_type.T.iloc[0].values[0][:4] == str(previous_year):

        annual_report_type[annual_report_type != str(previous_year) +
                           'q4'] = str(previous_year - 1) + 'q4'

    else:
        annual_report_type[annual_report_type != str(previous_year) +
                           'q4'] = str(previous_year) + 'q4'

    # recent_report_type 和 annual_report_type 均为 dataframe 格式,输出时转为 Series 格式

    return recent_report_type.T[date], annual_report_type.T[date]
Exemple #2
0
    def get_fundamentals(self, codes, start_date=None, years=10, type='y'):
        """
         q = query(fds.eod_derivative_indicator.pe_ratio,
                   fds.balance_sheet.total_assets,
                   fds.balance_sheet.total_liabilities,
                   fds.balance_sheet.total_equity_and_liabilities
                   ).filter(fds.stockcode.in_(codes))
         """

        q = query(
            fds.financial_indicator.adjusted_return_on_equity_diluted).filter(
                fds.stockcode.in_(codes))

        if start_date is None:
            start_date = public.getDate()

        print('-----', start_date)

        Y = str(years * 4) + 'q'
        res = rq.get_fundamentals(q,
                                  start_date,
                                  interval=Y,
                                  report_quarter=True)
        d = {}
        for c in codes:
            try:
                d[c] = res.minor_xs(c)
            except:
                continue
        return d
Exemple #3
0
def get_fundamentals(query, entry_date=None, interval='1d', report_quarter=False, expect_df=False, **kwargs):
    user_log.warn('get_fundamentals is deprecated, use get_pit_financials_ex instead')

    env = Environment.get_instance()
    dt = env.calendar_dt.date()
    if entry_date is None and 'date' in kwargs:
        entry_date = kwargs.pop('date')
    if kwargs:
        raise RQInvalidArgument('unknown arguments: {}'.format(kwargs))

    latest_query_day = dt - datetime.timedelta(days=1)

    if entry_date:
        entry_date = to_date(entry_date)
        if entry_date <= latest_query_day:
            query_date = entry_date
        else:
            raise RQInvalidArgument(
                _('in get_fundamentals entry_date {} is no earlier than test date {}').format(entry_date, dt))
    else:
        query_date = latest_query_day

    result = rqdatac.get_fundamentals(query, query_date, interval, report_quarter=report_quarter, expect_df=expect_df)
    if result is None:
        return pd.DataFrame()

    if expect_df:
        return result

    if len(result.major_axis) == 1:
        frame = result.major_xs(result.major_axis[0])
        # research 与回测返回的Frame维度相反
        return frame.T
    return result
Exemple #4
0
def get_index_component_industry_and_marketcap(matching_index, matching_date):
    """
    get matching_index industry and market_cap
    :param matching_index:
    :param matching_date:
    :return:
    """

    # get index components, industry
    i_c = rqdatac.index_components(matching_index, matching_date)
    matching_index_df = pd.DataFrame(index=i_c)
    matching_index_df['industry'] = [
        rqdatac.shenwan_instrument_industry(s, matching_date)
        for s in matching_index_df.index
    ]

    # get index market_cap
    market_cap = rqdatac.get_fundamentals(query(
        fundamentals.eod_derivative_indicator.market_cap).filter(
            fundamentals.eod_derivative_indicator.stockcode.in_(
                matching_index_df.index)),
                                          entry_date=matching_date)
    market_cap_ = market_cap.loc[market_cap.items[0]].transpose()

    # paste them as one df
    matching_index_cap_df = pd.concat([matching_index_df, market_cap_], axis=1)

    # change the column name
    matching_index_cap_df.columns.values[1] = 'market_cap'

    # calculate each component's percent by its market_cap
    total_market_cap = sum(matching_index_cap_df.market_cap)
    matching_index_cap_df[
        'percent'] = matching_index_cap_df.market_cap / total_market_cap

    # sort them by industry and market_cap
    res = matching_index_cap_df.sort_values(['industry', 'market_cap'])
    return res
def get_stock_test_suite(start_t='2013-01-01', end_t='2017-07-05'):
    """
    get alive stock test suite between dates (for test use between 20140101 to 2017)
    make sure it has IPO for at least one year and is never ST between dates
    :param start_t:
    :param end_t:
    :return: dic
        return a dic, key is 0-99.
        0 is the biggest 100
        1 is the second 101 ~ 200 stocks
        2 is the smallest -200 ~ -101 stocks
        3 is the smallest -100 ~ -1 stocks
        4 is the biggest 50 + smallest 50
        5 ~ 99 is the 28*3 combo (28: shenwan_industry category, 3: we split each cate by market cap)
    """
    # get all stocks
    all_stocks0 = list(all_instruments(type='CS').order_book_id)

    # make sure stocks are alive during start_t ~ end_t
    all_stocks1 = [
        i.order_book_id for i in instruments(all_stocks0)
        if i.listed_date <= start_t and (
            i.de_listed_date == '0000-00-00' or end_t < i.de_listed_date)
    ]

    # rule out ST stocks
    temp0 = is_st_stock(all_stocks1, start_t, end_t).sum(axis=0)
    all_stocks2 = [i for i in all_stocks1 if temp0.loc[i] == 0]

    # calculate all their market_cap
    market_cap = rqdatac.get_fundamentals(query(
        fundamentals.eod_derivative_indicator.market_cap).filter(
            fundamentals.eod_derivative_indicator.stockcode.in_(all_stocks2)),
                                          entry_date='20140101')
    market_cap_ = market_cap.loc[market_cap.items[0]].transpose()
    stock_df = pd.DataFrame(index=all_stocks2)
    temp1 = pd.concat([stock_df, market_cap_], axis=1)
    temp1.columns = ['market_cap']
    temp2 = temp1.sort_values(
        by='market_cap', ascending=False)  # descending sort by market value

    # tag them with shenwan category
    temp2["industry"] = [shenwan_instrument_industry(s) for s in temp2.index
                         ]  # don't add date to shenwan_instrument_industry
    shenwan_name = temp2.industry.unique()

    stock_test_suite = {}

    # notice that temp2 is sorted by market cap
    stock_test_suite[0] = list(temp2.index[:100])
    stock_test_suite[1] = list(temp2.index[100:200])
    stock_test_suite[2] = list(temp2.index[-200:-100])
    stock_test_suite[3] = list(temp2.index[-100:])
    stock_test_suite[4] = list(temp2.index[:50]) + list(temp2.index[-50:])

    # temp3 is sorted by industry first and then within industry by market cap in descending order
    temp3 = temp2.sort_values(by=['industry', 'market_cap'], ascending=False)

    # within industry tag them with [1,2,3] to split them into 3 categories
    for i in shenwan_name:
        index0 = temp3['industry'] == i
        len0 = sum(index0)
        len0_int = int(len0 / 3)
        len0_residual = len0 % 3
        cate_temp = list(np.repeat([1, 2, 3], len0_int)) + [3] * len0_residual
        temp3.loc[index0, 'category'] = cate_temp

    # get the number of stocks within each industry
    sum_info = temp3.groupby(by='industry').size()
    safe_num = min(sum_info) / 3  # this number is for randint() use

    for i in range(5, 100):
        stock_test_suite[i] = [
            temp3.loc[temp3.industry == a].loc[temp3.category == b].index[
                np.random.randint(safe_num)] for a in shenwan_name
            for b in [1, 2, 3]
        ]

    return stock_test_suite
def factor_return_estimation(date, factor_exposure, industry_factors):

    latest_trading_date = rqdatac.get_previous_trading_date(
        datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1))

    previous_trading_date = rqdatac.get_previous_trading_date(
        latest_trading_date)

    # 计算无风险日收益率

    daily_return = rqdatac.get_price(
        order_book_ids=factor_exposure.index.tolist(),
        start_date=previous_trading_date,
        end_date=latest_trading_date,
        fields='close').pct_change()[-1:].T

    compounded_risk_free_return = rqdatac.get_yield_curve(
        start_date=latest_trading_date,
        end_date=latest_trading_date,
        tenor='3M')['3M']

    daily_risk_free_return = (((1 + compounded_risk_free_return)**(1 / 252)) -
                              1)

    daily_excess_return = daily_return.subtract(
        daily_risk_free_return.values).T

    # 以市场平方根作为加权最小二乘法的加权系数

    market_cap = rqdatac.get_factor(
        id_or_symbols=factor_exposure.index.tolist(),
        factor='a_share_market_val',
        start_date=previous_trading_date,
        end_date=previous_trading_date)

    if market_cap.isnull().sum() >= 30:

        market_cap_df = rqdatac.get_fundamentals(
            rqdatac.query(rqdatac.fundamentals.eod_derivative_indicator.
                          a_share_market_val),
            entry_date=previous_trading_date,
            interval='1d').major_xs(previous_trading_date)[
                'a_share_market_val'].loc[factor_exposure.index]

        if market_cap_df.isnull().sum() >= 30:

            raise ValueError('市值出现大量缺失')

        else:

            market_cap = market_cap_df
    else:
        market_cap = market_cap.dropna()

    normalized_regression_weight = market_cap.pow(0.5) / market_cap.pow(
        0.5).sum()

    # 各行业市值之和,用于行业收益率约束条件

    industry_total_market_cap = market_cap.dot(
        factor_exposure.loc[market_cap.index][industry_factors])

    #factor_return_series = pd.DataFrame()

    # 对10个风格因子不添加约束,对 GICS 32个行业添加约束

    factor_return_series = constrainted_weighted_least_square(Y = daily_excess_return[market_cap.index].values[0], X = factor_exposure.loc[market_cap.index], weight = normalized_regression_weight,\
                                                                     industry_total_market_cap = industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(industry_total_market_cap))

    return factor_return_series.replace(np.nan, 0)