예제 #1
0
def get_last_reported_values(financial_indicator, recent_report_type):

    # 取出当天所有出现的财报类型

    unique_recent_report_type = recent_report_type.unique().tolist()

    last_reported_values = pd.Series()

    # 循环每一类型的报告,再合并返回

    for report_type in unique_recent_report_type:

        stock_list = recent_report_type[recent_report_type ==
                                        report_type].index.tolist()

        if len(stock_list) == 1:

            last_reported_values = last_reported_values.append(
                rqdatac.get_financials(
                    rqdatac.query(financial_indicator).filter(
                        rqdatac.financials.stockcode.in_(stock_list)),
                    report_type))

        else:

            last_reported_values = last_reported_values.append(
                rqdatac.get_financials(
                    rqdatac.query(financial_indicator).filter(
                        rqdatac.financials.stockcode.in_(stock_list)),
                    report_type).iloc[0])

    return last_reported_values
예제 #2
0
def get_earnings_growth(date, year, market_cap_on_current_day):
    recent_report, annual_report, annual_report_last_year, annual_report_2_year_ago, annual_report_3_year_ago, annual_report_4_year_ago = last_five_annual_report(
        date)
    growth_listed_date_threshold = (datetime.strptime(date, "%Y-%m-%d") -
                                    timedelta(days=1825)).strftime("%Y-%m-%d")
    growth_qualified_stocks = [
        i for i in annual_report.index.tolist()
        if rqdatac.instruments(i).listed_date < growth_listed_date_threshold
    ]

    factor = pd.DataFrame(index=growth_qualified_stocks, columns=['EGRO'])

    for stock in growth_qualified_stocks:
        # 实际操作中发现有部分公司会在财报发布后对报表进行多次调整,调整后eps为空,比如'601519.XSHG',该公司报表在发布后经过多次调整,2014年年报主要财务指标表"基本eps"数据缺失,但是在利润表中"基本eps"数据存在,
        # 所以在取数据时进行判断,如果financial_indicator为首选表,income_statement 为备选表
        query_f = rqdatac.query(
            rqdatac.financials.financial_indicator.earnings_per_share).filter(
                rqdatac.financials.stockcode.in_([stock]))

        query_i = rqdatac.query(rqdatac.financials.income_statement.
                                basic_earnings_per_share).filter(
                                    rqdatac.financials.stockcode.in_([stock]))

        eps_recent = rqdatac.get_financials(query_f, annual_report[stock], '1q') if \
            rqdatac.get_financials(query_f, annual_report[stock], '1q').isnull().sum() == 0 \
            else rqdatac.get_financials(query_i, annual_report[stock], '1q')

        eps_last_year = rqdatac.get_financials(query_f, annual_report_last_year[stock], '1q') if \
            rqdatac.get_financials(query_f, annual_report_last_year[stock], '1q').isnull().sum() == 0 \
            else rqdatac.get_financials(query_i, annual_report_last_year[stock], '1q')

        eps_2_year_ago = rqdatac.get_financials(query_f, annual_report_2_year_ago[stock], '1q') if \
            rqdatac.get_financials(query_f, annual_report_2_year_ago[stock], '1q').isnull().sum() == 0 \
            else rqdatac.get_financials(query_i, annual_report_2_year_ago[stock], '1q')

        eps_3_year_ago = rqdatac.get_financials(query_f, annual_report_3_year_ago[stock], '1q') if \
            rqdatac.get_financials(query_f, annual_report_3_year_ago[stock], '1q').isnull().sum() == 0 \
            else rqdatac.get_financials(query_i, annual_report_3_year_ago[stock], '1q')

        eps_4_year_ago = rqdatac.get_financials(query_f, annual_report_4_year_ago[stock], '1q') if \
            rqdatac.get_financials(query_f, annual_report_4_year_ago[stock], '1q').isnull().sum() == 0 \
            else rqdatac.get_financials(query_i, annual_report_4_year_ago[stock], '1q')

        regression = linear_model.LinearRegression()
        eps = pd.Series([
            eps_recent, eps_last_year, eps_2_year_ago, eps_3_year_ago,
            eps_4_year_ago
        ]).fillna(value=0)
        regression.fit(year.reshape(-1, 1), eps)
        factor['EGRO'][stock] = float(regression.coef_) / abs(eps.mean())
    earning_growth = winsorization_and_market_cap_weighed_standardization(
        factor['EGRO'], market_cap_on_current_day)

    return earning_growth
예제 #3
0
def get_recent_financial_report(date):

    previous_year = datetime.strptime(date, '%Y-%m-%d').year - 1

    # 取出最近一期财务报告类型,例如 '2016q3' 或  '2016q4', 其中 '2016q3' 表示前三季度累计; '2016q4' 表示年报

    recent_report_type = rqdatac.get_fundamentals(
        rqdatac.query(rqdatac.fundamentals.income_statement.net_profit),
        entry_date=date,
        report_quarter=True)['report_quarter']

    annual_report_type = recent_report_type.copy()  # 深拷贝

    # 若上市公司未发布去年的财报,则取前年的年报为最新年报

    if recent_report_type.T.iloc[0].values[0][:4] == str(previous_year):

        annual_report_type[annual_report_type != str(previous_year) +
                           'q4'] = str(previous_year - 1) + 'q4'

    else:
        annual_report_type[annual_report_type != str(previous_year) +
                           'q4'] = str(previous_year) + 'q4'

    # recent_report_type 和 annual_report_type 均为 dataframe 格式,输出时转为 Series 格式

    return recent_report_type.T[date], annual_report_type.T[date]
예제 #4
0
def get_ttm_sum(financial_indicator, recent_report_type):
    def _get_ttm_date(quarter):
        # 假设最新的为年报,则为年报数值
        if quarter[-2:] == "q4":
            return [np.nan, np.nan, quarter]
        # 假设当前为1/2/3季度报
        elif quarter[-2:] == "q3" or quarter[-2:] == "q2" or quarter[
                -2:] == "q1":
            return [
                str(int(quarter[:4]) - 1) + quarter[-2:],
                str(int(quarter[:4]) - 1) + "q4", quarter
            ]
        else:
            print(quarter)

            raise Exception("what?")

    # 获得所有股票中最新的quarter
    max_quarter = max(recent_report_type)
    # 获得所有股票前8期的财报数据
    financial_data = rqdatac.get_financials(rqdatac.query(financial_indicator),
                                            quarter=max_quarter,
                                            interval="8q",
                                            country='cn').T

    effective_quarter = pd.DataFrame(
        recent_report_type.apply(_get_ttm_date).to_dict()).T

    # 获得每个股票计算ttm需要的三个财报日期
    effective_quarter = effective_quarter.unstack()

    effective_quarter.index = effective_quarter.index.droplevel(0)
    merged_data = pd.DataFrame(effective_quarter)
    merged_data['mask'] = 1
    previous_quarters_mask = merged_data.dropna().reset_index().pivot(
        index='index', columns=0, values='mask').reindex(
            columns=financial_data.columns).astype(float).replace(
                np.nan, 0).astype(bool)
    latest_data = financial_data.where(previous_quarters_mask)

    # (最近一期年报财务数据 + 最近一期报告财务数据 - 去年同期报告财务数据)

    def _calc_ttm(data):
        # print(data)
        data = data.dropna().sort_index()
        if len(data) > 1:
            return data.iloc[-2:].sum() - data.iloc[0]
        elif len(data) == 1:
            return data.iloc[0]
        else:
            return np.nan

    financial_values = {
        item[0]: _calc_ttm(item[1])
        for item in latest_data.iterrows()
    }

    return pd.Series(financial_values)
예제 #5
0
def query(*entities):
    return rqdatac.query(*entities)
def factor_return_estimation(date, factor_exposure, industry_factors):

    latest_trading_date = rqdatac.get_previous_trading_date(
        datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1))

    previous_trading_date = rqdatac.get_previous_trading_date(
        latest_trading_date)

    # 计算无风险日收益率

    daily_return = rqdatac.get_price(
        order_book_ids=factor_exposure.index.tolist(),
        start_date=previous_trading_date,
        end_date=latest_trading_date,
        fields='close').pct_change()[-1:].T

    compounded_risk_free_return = rqdatac.get_yield_curve(
        start_date=latest_trading_date,
        end_date=latest_trading_date,
        tenor='3M')['3M']

    daily_risk_free_return = (((1 + compounded_risk_free_return)**(1 / 252)) -
                              1)

    daily_excess_return = daily_return.subtract(
        daily_risk_free_return.values).T

    # 以市场平方根作为加权最小二乘法的加权系数

    market_cap = rqdatac.get_factor(
        id_or_symbols=factor_exposure.index.tolist(),
        factor='a_share_market_val',
        start_date=previous_trading_date,
        end_date=previous_trading_date)

    if market_cap.isnull().sum() >= 30:

        market_cap_df = rqdatac.get_fundamentals(
            rqdatac.query(rqdatac.fundamentals.eod_derivative_indicator.
                          a_share_market_val),
            entry_date=previous_trading_date,
            interval='1d').major_xs(previous_trading_date)[
                'a_share_market_val'].loc[factor_exposure.index]

        if market_cap_df.isnull().sum() >= 30:

            raise ValueError('市值出现大量缺失')

        else:

            market_cap = market_cap_df
    else:
        market_cap = market_cap.dropna()

    normalized_regression_weight = market_cap.pow(0.5) / market_cap.pow(
        0.5).sum()

    # 各行业市值之和,用于行业收益率约束条件

    industry_total_market_cap = market_cap.dot(
        factor_exposure.loc[market_cap.index][industry_factors])

    #factor_return_series = pd.DataFrame()

    # 对10个风格因子不添加约束,对 GICS 32个行业添加约束

    factor_return_series = constrainted_weighted_least_square(Y = daily_excess_return[market_cap.index].values[0], X = factor_exposure.loc[market_cap.index], weight = normalized_regression_weight,\
                                                                     industry_total_market_cap = industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(industry_total_market_cap))

    return factor_return_series.replace(np.nan, 0)
예제 #7
0
def recent_five_annual_values(financial_indicator, date, recent_report_type):

    previous_year = datetime.strptime(date, '%Y-%m-%d').year - 1

    # 获得最近一期报告为年报的股票列表

    annual_report_published_stocks = recent_report_type[
        recent_report_type == str(previous_year) + 'q4'].index.tolist()

    # 把 index 和 list 转为集合类型,再计算补集

    annual_report_not_published_stocks = list(
        set(recent_report_type.index) - set(annual_report_published_stocks))

    # 对于去年年报已经发布的上市公司,最近五期年报的列表

    annual_report_published_list = [
        str(previous_year) + 'q4',
        str(previous_year - 1) + 'q4',
        str(previous_year - 2) + 'q4',
        str(previous_year - 3) + 'q4',
        str(previous_year - 4) + 'q4'
    ]

    # 对于去年年报尚未经发布的上市公司,最近五期年报的列表

    annual_report_not_published_list = [
        str(previous_year - 1) + 'q4',
        str(previous_year - 2) + 'q4',
        str(previous_year - 3) + 'q4',
        str(previous_year - 4) + 'q4',
        str(previous_year - 5) + 'q4'
    ]

    # 获得最近一期报告为年报的股票列表

    recent_five_reports = rqdatac.get_financials(
        rqdatac.query(financial_indicator),
        str(previous_year) + 'q4', '25q').T

    annual_report_published_values = recent_five_reports[
        annual_report_published_list].loc[annual_report_published_stocks]

    annual_report_not_published_values = recent_five_reports[
        annual_report_not_published_list].loc[
            annual_report_not_published_stocks]

    # 重新命名 columns,方便合并 dataframes

    annual_report_published_values.columns = [
        'first', 'second', 'third', 'fourth', 'fifth'
    ]

    annual_report_not_published_values.columns = [
        'first', 'second', 'third', 'fourth', 'fifth'
    ]

    recent_five_reports_values = pd.concat(
        [annual_report_published_values, annual_report_not_published_values],
        axis=0)

    return recent_five_reports_values
예제 #8
0
def get_sales_growth(date, year, market_cap_on_current_day):
    recent_report, annual_report, annual_report_last_year, annual_report_2_year_ago, annual_report_3_year_ago, annual_report_4_year_ago = last_five_annual_report(
        date)
    growth_listed_date_threshold = (datetime.strptime(date, "%Y-%m-%d") -
                                    timedelta(days=1825)).strftime("%Y-%m-%d")
    growth_qualified_stocks = [
        i for i in annual_report.index.tolist()
        if rqdatac.instruments(i).listed_date < growth_listed_date_threshold
    ]

    factor = pd.DataFrame(index=growth_qualified_stocks, columns=['SGRO'])

    # 根据年报数据计算每只股票过去五年每年的sales per share

    for stock in growth_qualified_stocks:

        query = rqdatac.query(
            rqdatac.financials.income_statement.revenue).filter(
                rqdatac.financials.stockcode.in_([stock]))
        sales_recent = rqdatac.get_financials(query, annual_report[stock],
                                              '1q')

        latest_trading_date_recent = str(
            rqdatac.get_previous_trading_date(
                datetime.strptime(annual_report[stock][:4] +
                                  '-12-31', '%Y-%m-%d') + timedelta(days=1)))

        shares_recent = rqdatac.get_shares(
            stock,
            start_date=latest_trading_date_recent,
            end_date=latest_trading_date_recent,
            fields='total')

        sales_per_share_recent = sales_recent.values / shares_recent.values

        sales_last_year = rqdatac.get_financials(
            query, annual_report_last_year[stock], '1q')

        latest_trading_date_last_year = str(
            rqdatac.get_previous_trading_date(
                datetime.strptime(
                    annual_report_last_year[stock][:4] +
                    '-12-31', '%Y-%m-%d') + timedelta(days=1)))

        shares_last_year = rqdatac.get_shares(
            stock,
            start_date=latest_trading_date_last_year,
            end_date=latest_trading_date_last_year,
            fields='total')

        sales_per_share_last_year = sales_last_year.values / shares_last_year.values

        sales_2_year_ago = rqdatac.get_financials(
            query, annual_report_2_year_ago[stock], '1q')

        latest_trading_date_2_year_ago = str(
            rqdatac.get_previous_trading_date(
                datetime.strptime(
                    annual_report_2_year_ago[stock][:4] +
                    '-12-31', '%Y-%m-%d') + timedelta(days=1)))

        shares_2_year_ago = rqdatac.get_shares(
            stock,
            start_date=latest_trading_date_2_year_ago,
            end_date=latest_trading_date_2_year_ago,
            fields='total')

        sales_per_share_2_year_ago = sales_2_year_ago.values / shares_2_year_ago.values

        sales_3_year_ago = rqdatac.get_financials(
            query, annual_report_3_year_ago[stock], '1q')

        latest_trading_date_3_year_ago = str(
            rqdatac.get_previous_trading_date(
                datetime.strptime(
                    annual_report_3_year_ago[stock][:4] +
                    '-12-31', '%Y-%m-%d') + timedelta(days=1)))

        shares_3_year_ago = rqdatac.get_shares(
            stock,
            start_date=latest_trading_date_3_year_ago,
            end_date=latest_trading_date_3_year_ago,
            fields='total')

        sales_per_share_3_year_ago = sales_3_year_ago.values / shares_3_year_ago.values

        sales_4_year_ago = rqdatac.get_financials(
            query, annual_report_4_year_ago[stock], '1q')

        latest_trading_date_4_year_ago = str(
            rqdatac.get_previous_trading_date(
                datetime.strptime(
                    annual_report_4_year_ago[stock][:4] +
                    '-12-31', '%Y-%m-%d') + timedelta(days=1)))

        shares_4_year_ago = rqdatac.get_shares(
            stock,
            start_date=latest_trading_date_4_year_ago,
            end_date=latest_trading_date_4_year_ago,
            fields='total')

        sales_per_share_4_year_ago = sales_4_year_ago.values / shares_4_year_ago.values

        regression = linear_model.LinearRegression()
        sales_per_share = pd.Series([
            sales_per_share_recent, sales_per_share_last_year,
            sales_per_share_2_year_ago, sales_per_share_3_year_ago,
            sales_per_share_4_year_ago
        ]).fillna(value=0)
        regression.fit(year.reshape(-1, 1), sales_per_share)
        factor['SGRO'][stock] = float(
            regression.coef_) / abs(sales_per_share).mean()

    sale_growth = winsorization_and_market_cap_weighed_standardization(
        factor['SGRO'], market_cap_on_current_day)

    return sale_growth