예제 #1
0
    def test_get_turnover(self):
        """
        Tests turnover using a 20 day period.

        With no transactions, the turnover should be 0.

        with 200% of the AGB traded each day, the daily
        turnover rate should be 2.0.
        """
        dates = pd.date_range(start="2015-01-01", freq="D", periods=20)

        # In this test, there is one sid (0) and a cash column
        positions = pd.DataFrame(
            [[10.0, 10.0]] * len(dates), columns=[0, "cash"], index=dates
        )

        # Set every other non-cash position to 40
        positions[0][::2] = 40

        transactions = pd.DataFrame(
            data=[], columns=["sid", "amount", "price", "symbol"], index=dates
        )

        # Test with no transactions
        expected = pd.Series([0.0] * len(dates), index=dates)
        result = get_turnover(positions, transactions).asfreq("D")
        assert_series_equal(result, expected)

        transactions = pd.DataFrame(
            data=[[1, 1, 10, 0]] * len(dates) + [[2, -1, 10, 0]] * len(dates),
            columns=["sid", "amount", "price", "symbol"],
            index=dates.append(dates),
        ).sort_index()

        # Turnover is more on day 1, because the day 0 AGB is set to zero
        # in get_turnover. On most days, we get 0.8 because we have 20
        # transacted and mean(10, 40) = 25, so 20/25.
        expected = pd.Series([1.0] + [0.8] * (len(dates) - 1), index=dates)
        result = get_turnover(positions, transactions).asfreq("D")

        assert_series_equal(result, expected)

        # Test with denominator = 'portfolio_value'
        result = get_turnover(
            positions, transactions, denominator="portfolio_value"
        ).asfreq("D")

        # Our portfolio value alternates between $20 and $50 so turnover
        # should alternate between 20/20 = 1.0 and 20/50 = 0.4.
        expected = pd.Series(
            [0.4, 1.0] * (int((len(dates) - 1) / 2) + 1), index=dates
        )

        assert_series_equal(result, expected)
예제 #2
0
    def test_get_turnover(self):
        """
        Tests turnover using a 20 day period.

        With no transactions, the turnover should be 0.

        with 200% of the AGB traded each day, the daily
        turnover rate should be 2.0.
        """
        dates = date_range(start='2015-01-01', freq='D', periods=20)

        # In this test, there is one sid (0) and a cash column
        positions = DataFrame([[10.0, 10.0]]*len(dates),
                              columns=[0, 'cash'], index=dates)

        # Set every other non-cash position to 40
        positions[0][::2] = 40

        transactions = DataFrame(data=[],
                                 columns=['sid', 'amount', 'price', 'symbol'],
                                 index=dates)

        # Test with no transactions
        expected = Series([0.0]*len(dates), index=dates)
        result = get_turnover(positions, transactions)
        assert_series_equal(result, expected)

        transactions = DataFrame(data=[[1, 1, 10, 0]]*len(dates) +
                                 [[2, -1, 10, 0]]*len(dates),
                                 columns=['sid', 'amount', 'price', 'symbol'],
                                 index=dates.append(dates)).sort_index()

        # Turnover is more on day 1, because the day 0 AGB is set to zero
        # in get_turnover. On most days, we get 0.8 because we have 20
        # transacted and mean(10, 40) = 25, so 20/25.
        expected = Series([1.0] + [0.8] * (len(dates) - 1), index=dates)
        result = get_turnover(positions, transactions)

        assert_series_equal(result, expected)

        # Test with denominator = 'portfolio_value'
        result = get_turnover(positions, transactions,
                              denominator='portfolio_value')

        # Our portfolio value alternates between $20 and $50 so turnover
        # should alternate between 20/20 = 1.0 and 20/50 = 0.4.
        expected = Series([0.4, 1.0] * (int((len(dates) - 1) / 2) + 1),
                          index=dates)

        assert_series_equal(result, expected)
예제 #3
0
    def slippage_sensitivity(self,
                             returns,
                             transactions,
                             positions,
                             slippage_range=range(100, 5000, 100)):
        """
        Curve relating per-dollar slippage to average annual returns.

        Parameters
        ----------
        returns : pd.Series
            Timeseries of portfolio returns to be adjusted for various
            degrees of slippage.
        transactions : pd.DataFrame
            Prices and amounts of executed trades. One row per trade.
             - See full explanation in tears.create_full_tear_sheet.
        positions : pd.DataFrame
            Daily net position values.
             - See full explanation in tears.create_full_tear_sheet.

        Returns
        -------
        avg_returns_given_slippage
        """
        turnover = txn.get_turnover(positions,
                                    transactions,
                                    period=None,
                                    average=False)
        avg_returns_given_slippage = pd.Series()
        for bps in slippage_range:
            adj_returns = txn.adjust_returns_for_slippage(
                returns, turnover, bps)
            avg_returns = empyrical.annual_return(adj_returns)
            avg_returns_given_slippage.loc[bps] = avg_returns
        return avg_returns_given_slippage
예제 #4
0
    def turnover(self, transactions, positions):
        """
        turnover vs. date.

        Turnover is the number of shares traded for a period as a fraction
        of total shares.

        Displays daily total, daily average per month, and all-time daily
        average.

        Parameters
        ----------
        transactions : pd.DataFrame
            Prices and amounts of executed trades. One row per trade.
             - See full explanation in tears.create_full_tear_sheet.
        positions : pd.DataFrame
            Daily net position values.
             - See full explanation in tears.create_full_tear_sheet.

        Returns
        -------
        turnover vs. date.
        """
        df_turnover = txn.get_turnover(positions, transactions)
        df_turnover_by_month = df_turnover.resample("M").mean()
        return {
            "Daily": df_turnover,
            "Monthly": df_turnover_by_month,
            "Mean": df_turnover.mean()
        }
예제 #5
0
    def test_get_turnover(self):
        """
        Tests turnover using a 20 day period.

        With no transactions the turnover should be 0.

        with 100% of the porfolio value traded each day
        the daily turnover rate should be 0.5.

        For monthly turnover it should be the sum
        of the daily turnovers because 20 days < 1 month.

        e.g (20 days) * (0.5 daily turn) = 10x monthly turnover rate.
        """
        dates = date_range(start='2015-01-01', freq='D', periods=20)

        positions = DataFrame([[0.0, 10.0]] * len(dates),
                              columns=[0, 'cash'],
                              index=dates)

        transactions = DataFrame(data=[],
                                 columns=['sid', 'amount', 'price', 'symbol'],
                                 index=dates)

        # Test with no transactions
        expected = Series([0.0] * len(dates), index=dates)
        result = get_turnover(positions, transactions)
        assert_series_equal(result, expected)

        # Monthly freq
        index = date_range('01-01-2015', freq='M', periods=1)
        expected = Series([0.0], index=index)
        result = get_turnover(positions, transactions, period='M')
        assert_series_equal(result, expected)

        transactions = DataFrame(data=[[1, 1, 10, 'A']] * len(dates),
                                 columns=['sid', 'amount', 'price', 'symbol'],
                                 index=dates)

        expected = Series([0.5] * len(dates), index=dates)
        result = get_turnover(positions, transactions)
        assert_series_equal(result, expected)

        # Monthly freq: should be the sum of the daily freq
        result = get_turnover(positions, transactions, period='M')
        expected = Series([10.0], index=index)
        assert_series_equal(result, expected)
예제 #6
0
    def test_get_turnover(self):
        """
        Tests turnover using a 20 day period.

        With no transactions the turnover should be 0.

        with 100% of the porfolio value traded each day
        the daily turnover rate should be 0.5.

        For monthly turnover it should be the sum
        of the daily turnovers because 20 days < 1 month.

        e.g (20 days) * (0.5 daily turn) = 10x monthly turnover rate.
        """
        dates = date_range(start='2015-01-01', freq='D', periods=20)

        positions = DataFrame([[0.0, 10.0]]*len(dates),
                              columns=[0, 'cash'], index=dates)
        transactions = DataFrame([[0, 0]]*len(dates),
                                 columns=['txn_volume', 'txn_shares'],
                                 index=dates)

        # Test with no transactions
        expected = Series([0.0]*len(dates), index=dates)
        result = get_turnover(transactions, positions)
        assert_series_equal(result, expected)

        # Monthly freq
        index = date_range('01-01-2015', freq='M', periods=1)
        expected = Series([0.0], index=index)
        result = get_turnover(transactions, positions, period='M')
        assert_series_equal(result, expected)

        # Test with 0.5 daily turnover
        transactions = DataFrame([[10.0, 0]]*len(dates),
                                 columns=['txn_volume', 'txn_shares'],
                                 index=dates)

        expected = Series([0.5]*len(dates), index=dates)
        result = get_turnover(transactions, positions)
        assert_series_equal(result, expected)

        # Monthly freq: should be the sum of the daily freq
        result = get_turnover(transactions, positions, period='M')
        expected = Series([10.0], index=index)
        assert_series_equal(result, expected)
예제 #7
0
    def test_adjust_returns_for_slippage(self):
        dates = date_range(start='2015-01-01', freq='D', periods=20)

        positions = DataFrame([[0.0, 10.0]]*len(dates),
                              columns=[0, 'cash'], index=dates)

        # 100% total, 50% average daily turnover
        transactions = DataFrame([[10.0, 0]]*len(dates),
                                 columns=['txn_volume', 'txn_shares'],
                                 index=dates)
        returns = Series([0.05]*len(dates), index=dates)
        # 0.001% slippage per dollar traded
        slippage_bps = 10
        expected = Series([0.049]*len(dates), index=dates)

        turnover = get_turnover(transactions, positions, average=False)
        result = adjust_returns_for_slippage(returns, turnover, slippage_bps)

        assert_series_equal(result, expected)
예제 #8
0
    def slippage_sweep(self,
                       returns,
                       transactions,
                       positions,
                       slippage_params=(100, 200, 300, 400, 500)):
        """
        Equity curves at different per-dollar slippage assumptions.
        注:1bps = 0.0001
        A股一个点是0.01 即100bps
        Parameters
        ----------
        returns : pd.Series
            Timeseries of portfolio returns to be adjusted for various
            degrees of slippage.
        transactions : pd.DataFrame
            Prices and amounts of executed trades. One row per trade.
             - See full explanation in tears.create_full_tear_sheet.
        positions : pd.DataFrame
            Daily net position values.
             - See full explanation in tears.create_full_tear_sheet.
        slippage_params: tuple
            Slippage pameters to apply to the return time series (in
            basis points).

        Returns
        -------
        Equity curves at different per-dollar slippage assumptions.
        """

        turnover = txn.get_turnover(positions,
                                    transactions,
                                    period=None,
                                    average=False)

        slippage_sweep = pd.DataFrame()
        for bps in slippage_params:
            adj_returns = txn.adjust_returns_for_slippage(
                returns, turnover, bps)
            label = str(bps) + " bps"
            slippage_sweep[label] = empyrical.cum_returns(adj_returns, 1)

        return slippage_sweep
예제 #9
0
    def test_adjust_returns_for_slippage(self):
        dates = date_range(start='2015-01-01', freq='D', periods=20)

        positions = DataFrame([[0.0, 10.0]] * len(dates),
                              columns=[0, 'cash'],
                              index=dates)

        # 100% total, 50% average daily turnover
        transactions = DataFrame([[10.0, 0]] * len(dates),
                                 columns=['txn_volume', 'txn_shares'],
                                 index=dates)
        returns = Series([0.05] * len(dates), index=dates)
        # 0.001% slippage per dollar traded
        slippage_bps = 10
        expected = Series([0.049] * len(dates), index=dates)

        turnover = get_turnover(transactions, positions, average=False)
        result = adjust_returns_for_slippage(returns, turnover, slippage_bps)

        assert_series_equal(result, expected)
예제 #10
0
def _align_and_warn(returns,
                    positions,
                    factor_returns,
                    factor_loadings,
                    transactions=None,
                    pos_in_dollars=True):
    """
    Make sure that all inputs have matching dates and tickers,
    and raise warnings if necessary.
    """
    missing_stocks = positions.columns.difference(
        factor_loadings.index.get_level_values(1).unique())

    # cash will not be in factor_loadings
    num_stocks = len(positions.columns) - 1
    missing_stocks = missing_stocks.drop('cash')
    num_stocks_covered = num_stocks - len(missing_stocks)
    missing_ratio = round(len(missing_stocks) / num_stocks, ndigits=3)

    if num_stocks_covered == 0:
        raise ValueError("Could not perform performance attribution. "
                         "No factor loadings were available for this "
                         "algorithm's positions.")

    if len(missing_stocks) > 0:

        if len(missing_stocks) > 5:

            missing_stocks_displayed = (
                " {} assets were missing factor loadings, including: {}..{}"
            ).format(len(missing_stocks),
                     ', '.join(missing_stocks[:5].map(str)),
                     missing_stocks[-1])
            avg_allocation_msg = "selected missing assets"

        else:
            missing_stocks_displayed = (
                "The following assets were missing factor loadings: {}."
            ).format(list(missing_stocks))
            avg_allocation_msg = "missing assets"

        missing_stocks_warning_msg = (
            "Could not determine risk exposures for some of this algorithm's "
            "positions. Returns from the missing assets will not be properly "
            "accounted for in performance attribution.\n"
            "\n"
            "{}. "
            "Ignoring for exposure calculation and performance attribution. "
            "Ratio of assets missing: {}. Average allocation of {}:\n"
            "\n"
            "{}.\n").format(
                missing_stocks_displayed,
                missing_ratio,
                avg_allocation_msg,
                positions[missing_stocks[:5].union(missing_stocks[[-1
                                                                   ]])].mean(),
            )

        warnings.warn(missing_stocks_warning_msg)

        positions = positions.drop(missing_stocks,
                                   axis='columns',
                                   errors='ignore')

    missing_factor_loadings_index = positions.index.difference(
        factor_loadings.index.get_level_values(0).unique())

    missing_factor_loadings_index = positions.index.difference(
        factor_loadings.index.get_level_values(0).unique())

    if len(missing_factor_loadings_index) > 0:

        if len(missing_factor_loadings_index) > 5:
            missing_dates_displayed = (
                "(first missing is {}, last missing is {})").format(
                    missing_factor_loadings_index[0],
                    missing_factor_loadings_index[-1])
        else:
            missing_dates_displayed = list(missing_factor_loadings_index)

        warning_msg = (
            "Could not find factor loadings for {} dates: {}. "
            "Truncating date range for performance attribution. ").format(
                len(missing_factor_loadings_index), missing_dates_displayed)

        warnings.warn(warning_msg)

        positions = positions.drop(missing_factor_loadings_index,
                                   errors='ignore')
        returns = returns.drop(missing_factor_loadings_index, errors='ignore')
        factor_returns = factor_returns.drop(missing_factor_loadings_index,
                                             errors='ignore')

    if transactions is not None and pos_in_dollars:
        turnover = get_turnover(positions, transactions).mean()
        if turnover > PERF_ATTRIB_TURNOVER_THRESHOLD:
            warning_msg = (
                "This algorithm has relatively high turnover of its "
                "positions. As a result, performance attribution might not be "
                "fully accurate.\n"
                "\n"
                "Performance attribution is calculated based "
                "on end-of-day holdings and does not account for intraday "
                "activity. Algorithms that derive a high percentage of "
                "returns from buying and selling within the same day may "
                "receive inaccurate performance attribution.\n")
            warnings.warn(warning_msg)

    return (returns, positions, factor_returns, factor_loadings)
예제 #11
0
def perf_attrib(returns,
                positions,
                factor_returns,
                factor_loadings,
                transactions=None,
                pos_in_dollars=True):
    """
    Does performance attribution given risk info.

    Parameters
    ----------
    returns : pd.Series
        Returns for each day in the date range.
        - Example:
            2017-01-01   -0.017098
            2017-01-02    0.002683
            2017-01-03   -0.008669

    positions: pd.DataFrame
        Daily holdings (in dollars or percentages), indexed by date.
        Will be converted to percentages if positions are in dollars.
        Short positions show up as cash in the 'cash' column.
        - Examples:
                        AAPL  TLT  XOM  cash
            2017-01-01    34   58   10     0
            2017-01-02    22   77   18     0
            2017-01-03   -15   27   30    15

                            AAPL       TLT       XOM  cash
            2017-01-01  0.333333  0.568627  0.098039   0.0
            2017-01-02  0.188034  0.658120  0.153846   0.0
            2017-01-03  0.208333  0.375000  0.416667   0.0

    factor_returns : pd.DataFrame
        Returns by factor, with date as index and factors as columns
        - Example:
                        momentum  reversal
            2017-01-01  0.002779 -0.005453
            2017-01-02  0.001096  0.010290

    factor_loadings : pd.DataFrame
        Factor loadings for all days in the date range, with date and ticker as
        index, and factors as columns.
        - Example:
                               momentum  reversal
            dt         ticker
            2017-01-01 AAPL   -1.592914  0.852830
                       TLT     0.184864  0.895534
                       XOM     0.993160  1.149353
            2017-01-02 AAPL   -0.140009 -0.524952
                       TLT    -1.066978  0.185435
                       XOM    -1.798401  0.761549

    transactions : pd.DataFrame, optional
        Executed trade volumes and fill prices. Used to check the turnover of
        the algorithm. Default is None, in which case the turnover check is
        skipped.

        - One row per trade.
        - Trades on different names that occur at the
          same time will have identical indicies.
        - Example:
            index                  amount   price    symbol
            2004-01-09 12:18:01    483      324.12   'AAPL'
            2004-01-09 12:18:01    122      83.10    'MSFT'
            2004-01-13 14:12:23    -75      340.43   'AAPL'

    pos_in_dollars : bool
        Flag indicating whether `positions` are in dollars or percentages
        If True, positions are in dollars.

    Returns
    -------
    tuple of (risk_exposures_portfolio, perf_attribution)

    risk_exposures_portfolio : pd.DataFrame
        df indexed by datetime, with factors as columns
        - Example:
                        momentum  reversal
            dt
            2017-01-01 -0.238655  0.077123
            2017-01-02  0.821872  1.520515

    perf_attribution : pd.DataFrame
        df with factors, common returns, and specific returns as columns,
        and datetimes as index
        - Example:
                        momentum  reversal  common_returns  specific_returns
            dt
            2017-01-01  0.249087  0.935925        1.185012          1.185012
            2017-01-02 -0.003194 -0.400786       -0.403980         -0.403980
    """
    missing_stocks = positions.columns.difference(
        factor_loadings.index.get_level_values(1).unique())

    # cash will not be in factor_loadings
    num_stocks = len(positions.columns) - 1
    missing_stocks = missing_stocks.drop('cash')
    num_stocks_covered = num_stocks - len(missing_stocks)
    missing_ratio = round(len(missing_stocks) / num_stocks, ndigits=3)

    if num_stocks_covered == 0:
        raise ValueError("Could not perform performance attribution. "
                         "No factor loadings were available for this "
                         "algorithm's positions.")

    if len(missing_stocks) > 0:

        if len(missing_stocks) > 5:

            missing_stocks_displayed = (
                " {} assets were missing factor loadings, including: {}..{}"
            ).format(len(missing_stocks),
                     ', '.join(missing_stocks[:5].map(str)),
                     missing_stocks[-1])
            avg_allocation_msg = "selected missing assets"

        else:
            missing_stocks_displayed = (
                "The following assets were missing factor loadings: {}."
            ).format(list(missing_stocks))
            avg_allocation_msg = "missing assets"

        missing_stocks_warning_msg = (
            "Could not determine risk exposures for some of this algorithm's "
            "positions. Returns from the missing assets will not be properly "
            "accounted for in performance attribution.\n"
            "\n"
            "{}. "
            "Ignoring for exposure calculation and performance attribution. "
            "Ratio of assets missing: {}. Average allocation of {}:\n"
            "\n"
            "{}.\n").format(
                missing_stocks_displayed,
                missing_ratio,
                avg_allocation_msg,
                positions[missing_stocks[:5].union(missing_stocks[[-1
                                                                   ]])].mean(),
            )

        warnings.warn(missing_stocks_warning_msg)

        positions = positions.drop(missing_stocks,
                                   axis='columns',
                                   errors='ignore')

    missing_factor_loadings_index = positions.index.difference(
        factor_loadings.index.get_level_values(0).unique())

    if len(missing_factor_loadings_index) > 0:

        if len(missing_factor_loadings_index) > 5:
            missing_dates_displayed = (
                "(first missing is {}, last missing is {})").format(
                    missing_factor_loadings_index[0],
                    missing_factor_loadings_index[-1])
        else:
            missing_dates_displayed = list(missing_factor_loadings_index)

        warning_msg = (
            "Could not find factor loadings for {} dates: {}. "
            "Truncating date range for performance attribution. ").format(
                len(missing_factor_loadings_index), missing_dates_displayed)

        warnings.warn(warning_msg)

        positions = positions.drop(missing_factor_loadings_index,
                                   errors='ignore')
        returns = returns.drop(missing_factor_loadings_index, errors='ignore')
        factor_returns = factor_returns.drop(missing_factor_loadings_index,
                                             errors='ignore')

    if transactions is not None and pos_in_dollars:
        turnover = get_turnover(positions, transactions).mean()
        if turnover > PERF_ATTRIB_TURNOVER_THRESHOLD:
            warning_msg = (
                "This algorithm has relatively high turnover of its "
                "positions. As a result, performance attribution might not be "
                "fully accurate.\n"
                "\n"
                "Performance attribution is calculated based "
                "on end-of-day holdings and does not account for intraday "
                "activity. Algorithms that derive a high percentage of "
                "returns from buying and selling within the same day may "
                "receive inaccurate performance attribution.\n")
            warnings.warn(warning_msg)

    # Note that we convert positions to percentages *after* the checks
    # above, since get_turnover() expects positions in dollars.
    if pos_in_dollars:
        # convert holdings to percentages
        positions = get_percent_alloc(positions)

    # remove cash after normalizing positions
    positions = positions.drop('cash', axis='columns')

    # convert positions to long format
    positions = positions.stack()
    positions.index = positions.index.set_names(['dt', 'ticker'])

    risk_exposures = factor_loadings.multiply(positions, axis='rows')

    risk_exposures_portfolio = risk_exposures.groupby(level='dt').sum()
    perf_attrib_by_factor = risk_exposures_portfolio.multiply(factor_returns)

    common_returns = perf_attrib_by_factor.sum(axis='columns')
    specific_returns = returns - common_returns

    returns_df = pd.DataFrame({
        'total_returns': returns,
        'common_returns': common_returns,
        'specific_returns': specific_returns
    })

    return (risk_exposures_portfolio,
            pd.concat([perf_attrib_by_factor, returns_df], axis='columns'))
예제 #12
0
def _align_and_warn(returns,
                    positions,
                    factor_returns,
                    factor_loadings,
                    transactions=None,
                    pos_in_dollars=True):
    """
    Make sure that all inputs have matching dates and tickers,
    and raise warnings if necessary.
    """
    missing_stocks = positions.columns.difference(
        factor_loadings.index.get_level_values(1).unique()
    )

    # cash will not be in factor_loadings
    num_stocks = len(positions.columns) - 1
    missing_stocks = missing_stocks.drop('cash')
    num_stocks_covered = num_stocks - len(missing_stocks)
    missing_ratio = round(len(missing_stocks) / num_stocks, ndigits=3)

    if num_stocks_covered == 0:
        raise ValueError("Could not perform performance attribution. "
                         "No factor loadings were available for this "
                         "algorithm's positions.")

    if len(missing_stocks) > 0:

        if len(missing_stocks) > 5:

            missing_stocks_displayed = (
                " {} assets were missing factor loadings, including: {}..{}"
            ).format(len(missing_stocks),
                     ', '.join(missing_stocks[:5].map(str)),
                     missing_stocks[-1])
            avg_allocation_msg = "selected missing assets"

        else:
            missing_stocks_displayed = (
                "The following assets were missing factor loadings: {}."
            ).format(list(missing_stocks))
            avg_allocation_msg = "missing assets"

        missing_stocks_warning_msg = (
            "Could not determine risk exposures for some of this algorithm's "
            "positions. Returns from the missing assets will not be properly "
            "accounted for in performance attribution.\n"
            "\n"
            "{}. "
            "Ignoring for exposure calculation and performance attribution. "
            "Ratio of assets missing: {}. Average allocation of {}:\n"
            "\n"
            "{}.\n"
        ).format(
            missing_stocks_displayed,
            missing_ratio,
            avg_allocation_msg,
            positions[missing_stocks[:5].union(missing_stocks[[-1]])].mean(),
        )

        warnings.warn(missing_stocks_warning_msg)

        positions = positions.drop(missing_stocks, axis='columns',
                                   errors='ignore')

    missing_factor_loadings_index = positions.index.difference(
        factor_loadings.index.get_level_values(0).unique()
    )

    missing_factor_loadings_index = positions.index.difference(
        factor_loadings.index.get_level_values(0).unique()
    )

    if len(missing_factor_loadings_index) > 0:

        if len(missing_factor_loadings_index) > 5:
            missing_dates_displayed = (
                "(first missing is {}, last missing is {})"
            ).format(
                missing_factor_loadings_index[0],
                missing_factor_loadings_index[-1]
            )
        else:
            missing_dates_displayed = list(missing_factor_loadings_index)

        warning_msg = (
            "Could not find factor loadings for {} dates: {}. "
            "Truncating date range for performance attribution. "
        ).format(len(missing_factor_loadings_index), missing_dates_displayed)

        warnings.warn(warning_msg)

        positions = positions.drop(missing_factor_loadings_index,
                                   errors='ignore')
        returns = returns.drop(missing_factor_loadings_index, errors='ignore')
        factor_returns = factor_returns.drop(missing_factor_loadings_index,
                                             errors='ignore')

    if transactions is not None and pos_in_dollars:
        turnover = get_turnover(positions, transactions).mean()
        if turnover > PERF_ATTRIB_TURNOVER_THRESHOLD:
            warning_msg = (
                "This algorithm has relatively high turnover of its "
                "positions. As a result, performance attribution might not be "
                "fully accurate.\n"
                "\n"
                "Performance attribution is calculated based "
                "on end-of-day holdings and does not account for intraday "
                "activity. Algorithms that derive a high percentage of "
                "returns from buying and selling within the same day may "
                "receive inaccurate performance attribution.\n"
            )
            warnings.warn(warning_msg)

    return (returns, positions, factor_returns, factor_loadings)
예제 #13
0
def perf_attrib(returns,
                positions,
                factor_returns,
                factor_loadings,
                transactions=None,
                pos_in_dollars=True):
    """
    Does performance attribution given risk info.

    Parameters
    ----------
    returns : pd.Series
        Returns for each day in the date range.
        - Example:
            2017-01-01   -0.017098
            2017-01-02    0.002683
            2017-01-03   -0.008669

    positions: pd.DataFrame
        Daily holdings (in dollars or percentages), indexed by date.
        Will be converted to percentages if positions are in dollars.
        Short positions show up as cash in the 'cash' column.
        - Examples:
                        AAPL  TLT  XOM  cash
            2017-01-01    34   58   10     0
            2017-01-02    22   77   18     0
            2017-01-03   -15   27   30    15

                            AAPL       TLT       XOM  cash
            2017-01-01  0.333333  0.568627  0.098039   0.0
            2017-01-02  0.188034  0.658120  0.153846   0.0
            2017-01-03  0.208333  0.375000  0.416667   0.0

    factor_returns : pd.DataFrame
        Returns by factor, with date as index and factors as columns
        - Example:
                        momentum  reversal
            2017-01-01  0.002779 -0.005453
            2017-01-02  0.001096  0.010290

    factor_loadings : pd.DataFrame
        Factor loadings for all days in the date range, with date and ticker as
        index, and factors as columns.
        - Example:
                               momentum  reversal
            dt         ticker
            2017-01-01 AAPL   -1.592914  0.852830
                       TLT     0.184864  0.895534
                       XOM     0.993160  1.149353
            2017-01-02 AAPL   -0.140009 -0.524952
                       TLT    -1.066978  0.185435
                       XOM    -1.798401  0.761549

    transactions : pd.DataFrame, optional
        Executed trade volumes and fill prices. Used to check the turnover of
        the algorithm. Default is None, in which case the turnover check is
        skipped.

        - One row per trade.
        - Trades on different names that occur at the
          same time will have identical indicies.
        - Example:
            index                  amount   price    symbol
            2004-01-09 12:18:01    483      324.12   'AAPL'
            2004-01-09 12:18:01    122      83.10    'MSFT'
            2004-01-13 14:12:23    -75      340.43   'AAPL'

    pos_in_dollars : bool
        Flag indicating whether `positions` are in dollars or percentages
        If True, positions are in dollars.

    Returns
    -------
    tuple of (risk_exposures_portfolio, perf_attribution)

    risk_exposures_portfolio : pd.DataFrame
        df indexed by datetime, with factors as columns
        - Example:
                        momentum  reversal
            dt
            2017-01-01 -0.238655  0.077123
            2017-01-02  0.821872  1.520515

    perf_attribution : pd.DataFrame
        df with factors, common returns, and specific returns as columns,
        and datetimes as index
        - Example:
                        momentum  reversal  common_returns  specific_returns
            dt
            2017-01-01  0.249087  0.935925        1.185012          1.185012
            2017-01-02 -0.003194 -0.400786       -0.403980         -0.403980
    """
    missing_stocks = positions.columns.difference(
        factor_loadings.index.get_level_values(1).unique()
    )

    # cash will not be in factor_loadings
    num_stocks = len(positions.columns) - 1
    missing_stocks = missing_stocks.drop('cash')
    num_stocks_covered = num_stocks - len(missing_stocks)
    missing_ratio = round(len(missing_stocks) / num_stocks, ndigits=3)

    if num_stocks_covered == 0:
        raise ValueError("Could not perform performance attribution. "
                         "No factor loadings were available for this "
                         "algorithm's positions.")

    if len(missing_stocks) > 0:

        if len(missing_stocks) > 5:

            missing_stocks_displayed = (
                " {} assets were missing factor loadings, including: {}..{}"
            ).format(len(missing_stocks),
                     ', '.join(missing_stocks[:5].map(str)),
                     missing_stocks[-1])
            avg_allocation_msg = "selected missing assets"

        else:
            missing_stocks_displayed = (
                "The following assets were missing factor loadings: {}."
            ).format(list(missing_stocks))
            avg_allocation_msg = "missing assets"

        missing_stocks_warning_msg = (
            "Could not determine risk exposures for some of this algorithm's "
            "positions. Returns from the missing assets will not be properly "
            "accounted for in performance attribution.\n"
            "\n"
            "{}. "
            "Ignoring for exposure calculation and performance attribution. "
            "Ratio of assets missing: {}. Average allocation of {}:\n"
            "\n"
            "{}.\n"
        ).format(
            missing_stocks_displayed,
            missing_ratio,
            avg_allocation_msg,
            positions[missing_stocks[:5].union(missing_stocks[[-1]])].mean(),
        )

        warnings.warn(missing_stocks_warning_msg)

        positions = positions.drop(missing_stocks, axis='columns',
                                   errors='ignore')

    missing_factor_loadings_index = positions.index.difference(
        factor_loadings.index.get_level_values(0).unique()
    )

    if len(missing_factor_loadings_index) > 0:

        if len(missing_factor_loadings_index) > 5:
            missing_dates_displayed = (
                "(first missing is {}, last missing is {})"
            ).format(
                missing_factor_loadings_index[0],
                missing_factor_loadings_index[-1]
            )
        else:
            missing_dates_displayed = list(missing_factor_loadings_index)

        warning_msg = (
            "Could not find factor loadings for {} dates: {}. "
            "Truncating date range for performance attribution. "
        ).format(len(missing_factor_loadings_index), missing_dates_displayed)

        warnings.warn(warning_msg)

        positions = positions.drop(missing_factor_loadings_index,
                                   errors='ignore')
        returns = returns.drop(missing_factor_loadings_index, errors='ignore')
        factor_returns = factor_returns.drop(missing_factor_loadings_index,
                                             errors='ignore')

    if transactions is not None and pos_in_dollars:
        turnover = get_turnover(positions, transactions).mean()
        if turnover > PERF_ATTRIB_TURNOVER_THRESHOLD:
            warning_msg = (
                "This algorithm has relatively high turnover of its "
                "positions. As a result, performance attribution might not be "
                "fully accurate.\n"
                "\n"
                "Performance attribution is calculated based "
                "on end-of-day holdings and does not account for intraday "
                "activity. Algorithms that derive a high percentage of "
                "returns from buying and selling within the same day may "
                "receive inaccurate performance attribution.\n"
            )
            warnings.warn(warning_msg)

    # Note that we convert positions to percentages *after* the checks
    # above, since get_turnover() expects positions in dollars.
    if pos_in_dollars:
        # convert holdings to percentages
        positions = get_percent_alloc(positions)

    # remove cash after normalizing positions
    positions = positions.drop('cash', axis='columns')

    # convert positions to long format
    positions = positions.stack()
    positions.index = positions.index.set_names(['dt', 'ticker'])

    risk_exposures = factor_loadings.multiply(positions,
                                              axis='rows')

    risk_exposures_portfolio = risk_exposures.groupby(level='dt').sum()
    perf_attrib_by_factor = risk_exposures_portfolio.multiply(factor_returns)

    common_returns = perf_attrib_by_factor.sum(axis='columns')
    specific_returns = returns - common_returns

    returns_df = pd.DataFrame({'total_returns': returns,
                               'common_returns': common_returns,
                               'specific_returns': specific_returns})

    return (risk_exposures_portfolio,
            pd.concat([perf_attrib_by_factor, returns_df], axis='columns'))