def test_get_turnover(self): """ Tests turnover using a 20 day period. With no transactions, the turnover should be 0. with 200% of the AGB traded each day, the daily turnover rate should be 2.0. """ dates = pd.date_range(start="2015-01-01", freq="D", periods=20) # In this test, there is one sid (0) and a cash column positions = pd.DataFrame( [[10.0, 10.0]] * len(dates), columns=[0, "cash"], index=dates ) # Set every other non-cash position to 40 positions[0][::2] = 40 transactions = pd.DataFrame( data=[], columns=["sid", "amount", "price", "symbol"], index=dates ) # Test with no transactions expected = pd.Series([0.0] * len(dates), index=dates) result = get_turnover(positions, transactions).asfreq("D") assert_series_equal(result, expected) transactions = pd.DataFrame( data=[[1, 1, 10, 0]] * len(dates) + [[2, -1, 10, 0]] * len(dates), columns=["sid", "amount", "price", "symbol"], index=dates.append(dates), ).sort_index() # Turnover is more on day 1, because the day 0 AGB is set to zero # in get_turnover. On most days, we get 0.8 because we have 20 # transacted and mean(10, 40) = 25, so 20/25. expected = pd.Series([1.0] + [0.8] * (len(dates) - 1), index=dates) result = get_turnover(positions, transactions).asfreq("D") assert_series_equal(result, expected) # Test with denominator = 'portfolio_value' result = get_turnover( positions, transactions, denominator="portfolio_value" ).asfreq("D") # Our portfolio value alternates between $20 and $50 so turnover # should alternate between 20/20 = 1.0 and 20/50 = 0.4. expected = pd.Series( [0.4, 1.0] * (int((len(dates) - 1) / 2) + 1), index=dates ) assert_series_equal(result, expected)
def test_get_turnover(self): """ Tests turnover using a 20 day period. With no transactions, the turnover should be 0. with 200% of the AGB traded each day, the daily turnover rate should be 2.0. """ dates = date_range(start='2015-01-01', freq='D', periods=20) # In this test, there is one sid (0) and a cash column positions = DataFrame([[10.0, 10.0]]*len(dates), columns=[0, 'cash'], index=dates) # Set every other non-cash position to 40 positions[0][::2] = 40 transactions = DataFrame(data=[], columns=['sid', 'amount', 'price', 'symbol'], index=dates) # Test with no transactions expected = Series([0.0]*len(dates), index=dates) result = get_turnover(positions, transactions) assert_series_equal(result, expected) transactions = DataFrame(data=[[1, 1, 10, 0]]*len(dates) + [[2, -1, 10, 0]]*len(dates), columns=['sid', 'amount', 'price', 'symbol'], index=dates.append(dates)).sort_index() # Turnover is more on day 1, because the day 0 AGB is set to zero # in get_turnover. On most days, we get 0.8 because we have 20 # transacted and mean(10, 40) = 25, so 20/25. expected = Series([1.0] + [0.8] * (len(dates) - 1), index=dates) result = get_turnover(positions, transactions) assert_series_equal(result, expected) # Test with denominator = 'portfolio_value' result = get_turnover(positions, transactions, denominator='portfolio_value') # Our portfolio value alternates between $20 and $50 so turnover # should alternate between 20/20 = 1.0 and 20/50 = 0.4. expected = Series([0.4, 1.0] * (int((len(dates) - 1) / 2) + 1), index=dates) assert_series_equal(result, expected)
def slippage_sensitivity(self, returns, transactions, positions, slippage_range=range(100, 5000, 100)): """ Curve relating per-dollar slippage to average annual returns. Parameters ---------- returns : pd.Series Timeseries of portfolio returns to be adjusted for various degrees of slippage. transactions : pd.DataFrame Prices and amounts of executed trades. One row per trade. - See full explanation in tears.create_full_tear_sheet. positions : pd.DataFrame Daily net position values. - See full explanation in tears.create_full_tear_sheet. Returns ------- avg_returns_given_slippage """ turnover = txn.get_turnover(positions, transactions, period=None, average=False) avg_returns_given_slippage = pd.Series() for bps in slippage_range: adj_returns = txn.adjust_returns_for_slippage( returns, turnover, bps) avg_returns = empyrical.annual_return(adj_returns) avg_returns_given_slippage.loc[bps] = avg_returns return avg_returns_given_slippage
def turnover(self, transactions, positions): """ turnover vs. date. Turnover is the number of shares traded for a period as a fraction of total shares. Displays daily total, daily average per month, and all-time daily average. Parameters ---------- transactions : pd.DataFrame Prices and amounts of executed trades. One row per trade. - See full explanation in tears.create_full_tear_sheet. positions : pd.DataFrame Daily net position values. - See full explanation in tears.create_full_tear_sheet. Returns ------- turnover vs. date. """ df_turnover = txn.get_turnover(positions, transactions) df_turnover_by_month = df_turnover.resample("M").mean() return { "Daily": df_turnover, "Monthly": df_turnover_by_month, "Mean": df_turnover.mean() }
def test_get_turnover(self): """ Tests turnover using a 20 day period. With no transactions the turnover should be 0. with 100% of the porfolio value traded each day the daily turnover rate should be 0.5. For monthly turnover it should be the sum of the daily turnovers because 20 days < 1 month. e.g (20 days) * (0.5 daily turn) = 10x monthly turnover rate. """ dates = date_range(start='2015-01-01', freq='D', periods=20) positions = DataFrame([[0.0, 10.0]] * len(dates), columns=[0, 'cash'], index=dates) transactions = DataFrame(data=[], columns=['sid', 'amount', 'price', 'symbol'], index=dates) # Test with no transactions expected = Series([0.0] * len(dates), index=dates) result = get_turnover(positions, transactions) assert_series_equal(result, expected) # Monthly freq index = date_range('01-01-2015', freq='M', periods=1) expected = Series([0.0], index=index) result = get_turnover(positions, transactions, period='M') assert_series_equal(result, expected) transactions = DataFrame(data=[[1, 1, 10, 'A']] * len(dates), columns=['sid', 'amount', 'price', 'symbol'], index=dates) expected = Series([0.5] * len(dates), index=dates) result = get_turnover(positions, transactions) assert_series_equal(result, expected) # Monthly freq: should be the sum of the daily freq result = get_turnover(positions, transactions, period='M') expected = Series([10.0], index=index) assert_series_equal(result, expected)
def test_get_turnover(self): """ Tests turnover using a 20 day period. With no transactions the turnover should be 0. with 100% of the porfolio value traded each day the daily turnover rate should be 0.5. For monthly turnover it should be the sum of the daily turnovers because 20 days < 1 month. e.g (20 days) * (0.5 daily turn) = 10x monthly turnover rate. """ dates = date_range(start='2015-01-01', freq='D', periods=20) positions = DataFrame([[0.0, 10.0]]*len(dates), columns=[0, 'cash'], index=dates) transactions = DataFrame([[0, 0]]*len(dates), columns=['txn_volume', 'txn_shares'], index=dates) # Test with no transactions expected = Series([0.0]*len(dates), index=dates) result = get_turnover(transactions, positions) assert_series_equal(result, expected) # Monthly freq index = date_range('01-01-2015', freq='M', periods=1) expected = Series([0.0], index=index) result = get_turnover(transactions, positions, period='M') assert_series_equal(result, expected) # Test with 0.5 daily turnover transactions = DataFrame([[10.0, 0]]*len(dates), columns=['txn_volume', 'txn_shares'], index=dates) expected = Series([0.5]*len(dates), index=dates) result = get_turnover(transactions, positions) assert_series_equal(result, expected) # Monthly freq: should be the sum of the daily freq result = get_turnover(transactions, positions, period='M') expected = Series([10.0], index=index) assert_series_equal(result, expected)
def test_adjust_returns_for_slippage(self): dates = date_range(start='2015-01-01', freq='D', periods=20) positions = DataFrame([[0.0, 10.0]]*len(dates), columns=[0, 'cash'], index=dates) # 100% total, 50% average daily turnover transactions = DataFrame([[10.0, 0]]*len(dates), columns=['txn_volume', 'txn_shares'], index=dates) returns = Series([0.05]*len(dates), index=dates) # 0.001% slippage per dollar traded slippage_bps = 10 expected = Series([0.049]*len(dates), index=dates) turnover = get_turnover(transactions, positions, average=False) result = adjust_returns_for_slippage(returns, turnover, slippage_bps) assert_series_equal(result, expected)
def slippage_sweep(self, returns, transactions, positions, slippage_params=(100, 200, 300, 400, 500)): """ Equity curves at different per-dollar slippage assumptions. 注:1bps = 0.0001 A股一个点是0.01 即100bps Parameters ---------- returns : pd.Series Timeseries of portfolio returns to be adjusted for various degrees of slippage. transactions : pd.DataFrame Prices and amounts of executed trades. One row per trade. - See full explanation in tears.create_full_tear_sheet. positions : pd.DataFrame Daily net position values. - See full explanation in tears.create_full_tear_sheet. slippage_params: tuple Slippage pameters to apply to the return time series (in basis points). Returns ------- Equity curves at different per-dollar slippage assumptions. """ turnover = txn.get_turnover(positions, transactions, period=None, average=False) slippage_sweep = pd.DataFrame() for bps in slippage_params: adj_returns = txn.adjust_returns_for_slippage( returns, turnover, bps) label = str(bps) + " bps" slippage_sweep[label] = empyrical.cum_returns(adj_returns, 1) return slippage_sweep
def test_adjust_returns_for_slippage(self): dates = date_range(start='2015-01-01', freq='D', periods=20) positions = DataFrame([[0.0, 10.0]] * len(dates), columns=[0, 'cash'], index=dates) # 100% total, 50% average daily turnover transactions = DataFrame([[10.0, 0]] * len(dates), columns=['txn_volume', 'txn_shares'], index=dates) returns = Series([0.05] * len(dates), index=dates) # 0.001% slippage per dollar traded slippage_bps = 10 expected = Series([0.049] * len(dates), index=dates) turnover = get_turnover(transactions, positions, average=False) result = adjust_returns_for_slippage(returns, turnover, slippage_bps) assert_series_equal(result, expected)
def _align_and_warn(returns, positions, factor_returns, factor_loadings, transactions=None, pos_in_dollars=True): """ Make sure that all inputs have matching dates and tickers, and raise warnings if necessary. """ missing_stocks = positions.columns.difference( factor_loadings.index.get_level_values(1).unique()) # cash will not be in factor_loadings num_stocks = len(positions.columns) - 1 missing_stocks = missing_stocks.drop('cash') num_stocks_covered = num_stocks - len(missing_stocks) missing_ratio = round(len(missing_stocks) / num_stocks, ndigits=3) if num_stocks_covered == 0: raise ValueError("Could not perform performance attribution. " "No factor loadings were available for this " "algorithm's positions.") if len(missing_stocks) > 0: if len(missing_stocks) > 5: missing_stocks_displayed = ( " {} assets were missing factor loadings, including: {}..{}" ).format(len(missing_stocks), ', '.join(missing_stocks[:5].map(str)), missing_stocks[-1]) avg_allocation_msg = "selected missing assets" else: missing_stocks_displayed = ( "The following assets were missing factor loadings: {}." ).format(list(missing_stocks)) avg_allocation_msg = "missing assets" missing_stocks_warning_msg = ( "Could not determine risk exposures for some of this algorithm's " "positions. Returns from the missing assets will not be properly " "accounted for in performance attribution.\n" "\n" "{}. " "Ignoring for exposure calculation and performance attribution. " "Ratio of assets missing: {}. Average allocation of {}:\n" "\n" "{}.\n").format( missing_stocks_displayed, missing_ratio, avg_allocation_msg, positions[missing_stocks[:5].union(missing_stocks[[-1 ]])].mean(), ) warnings.warn(missing_stocks_warning_msg) positions = positions.drop(missing_stocks, axis='columns', errors='ignore') missing_factor_loadings_index = positions.index.difference( factor_loadings.index.get_level_values(0).unique()) missing_factor_loadings_index = positions.index.difference( factor_loadings.index.get_level_values(0).unique()) if len(missing_factor_loadings_index) > 0: if len(missing_factor_loadings_index) > 5: missing_dates_displayed = ( "(first missing is {}, last missing is {})").format( missing_factor_loadings_index[0], missing_factor_loadings_index[-1]) else: missing_dates_displayed = list(missing_factor_loadings_index) warning_msg = ( "Could not find factor loadings for {} dates: {}. " "Truncating date range for performance attribution. ").format( len(missing_factor_loadings_index), missing_dates_displayed) warnings.warn(warning_msg) positions = positions.drop(missing_factor_loadings_index, errors='ignore') returns = returns.drop(missing_factor_loadings_index, errors='ignore') factor_returns = factor_returns.drop(missing_factor_loadings_index, errors='ignore') if transactions is not None and pos_in_dollars: turnover = get_turnover(positions, transactions).mean() if turnover > PERF_ATTRIB_TURNOVER_THRESHOLD: warning_msg = ( "This algorithm has relatively high turnover of its " "positions. As a result, performance attribution might not be " "fully accurate.\n" "\n" "Performance attribution is calculated based " "on end-of-day holdings and does not account for intraday " "activity. Algorithms that derive a high percentage of " "returns from buying and selling within the same day may " "receive inaccurate performance attribution.\n") warnings.warn(warning_msg) return (returns, positions, factor_returns, factor_loadings)
def perf_attrib(returns, positions, factor_returns, factor_loadings, transactions=None, pos_in_dollars=True): """ Does performance attribution given risk info. Parameters ---------- returns : pd.Series Returns for each day in the date range. - Example: 2017-01-01 -0.017098 2017-01-02 0.002683 2017-01-03 -0.008669 positions: pd.DataFrame Daily holdings (in dollars or percentages), indexed by date. Will be converted to percentages if positions are in dollars. Short positions show up as cash in the 'cash' column. - Examples: AAPL TLT XOM cash 2017-01-01 34 58 10 0 2017-01-02 22 77 18 0 2017-01-03 -15 27 30 15 AAPL TLT XOM cash 2017-01-01 0.333333 0.568627 0.098039 0.0 2017-01-02 0.188034 0.658120 0.153846 0.0 2017-01-03 0.208333 0.375000 0.416667 0.0 factor_returns : pd.DataFrame Returns by factor, with date as index and factors as columns - Example: momentum reversal 2017-01-01 0.002779 -0.005453 2017-01-02 0.001096 0.010290 factor_loadings : pd.DataFrame Factor loadings for all days in the date range, with date and ticker as index, and factors as columns. - Example: momentum reversal dt ticker 2017-01-01 AAPL -1.592914 0.852830 TLT 0.184864 0.895534 XOM 0.993160 1.149353 2017-01-02 AAPL -0.140009 -0.524952 TLT -1.066978 0.185435 XOM -1.798401 0.761549 transactions : pd.DataFrame, optional Executed trade volumes and fill prices. Used to check the turnover of the algorithm. Default is None, in which case the turnover check is skipped. - One row per trade. - Trades on different names that occur at the same time will have identical indicies. - Example: index amount price symbol 2004-01-09 12:18:01 483 324.12 'AAPL' 2004-01-09 12:18:01 122 83.10 'MSFT' 2004-01-13 14:12:23 -75 340.43 'AAPL' pos_in_dollars : bool Flag indicating whether `positions` are in dollars or percentages If True, positions are in dollars. Returns ------- tuple of (risk_exposures_portfolio, perf_attribution) risk_exposures_portfolio : pd.DataFrame df indexed by datetime, with factors as columns - Example: momentum reversal dt 2017-01-01 -0.238655 0.077123 2017-01-02 0.821872 1.520515 perf_attribution : pd.DataFrame df with factors, common returns, and specific returns as columns, and datetimes as index - Example: momentum reversal common_returns specific_returns dt 2017-01-01 0.249087 0.935925 1.185012 1.185012 2017-01-02 -0.003194 -0.400786 -0.403980 -0.403980 """ missing_stocks = positions.columns.difference( factor_loadings.index.get_level_values(1).unique()) # cash will not be in factor_loadings num_stocks = len(positions.columns) - 1 missing_stocks = missing_stocks.drop('cash') num_stocks_covered = num_stocks - len(missing_stocks) missing_ratio = round(len(missing_stocks) / num_stocks, ndigits=3) if num_stocks_covered == 0: raise ValueError("Could not perform performance attribution. " "No factor loadings were available for this " "algorithm's positions.") if len(missing_stocks) > 0: if len(missing_stocks) > 5: missing_stocks_displayed = ( " {} assets were missing factor loadings, including: {}..{}" ).format(len(missing_stocks), ', '.join(missing_stocks[:5].map(str)), missing_stocks[-1]) avg_allocation_msg = "selected missing assets" else: missing_stocks_displayed = ( "The following assets were missing factor loadings: {}." ).format(list(missing_stocks)) avg_allocation_msg = "missing assets" missing_stocks_warning_msg = ( "Could not determine risk exposures for some of this algorithm's " "positions. Returns from the missing assets will not be properly " "accounted for in performance attribution.\n" "\n" "{}. " "Ignoring for exposure calculation and performance attribution. " "Ratio of assets missing: {}. Average allocation of {}:\n" "\n" "{}.\n").format( missing_stocks_displayed, missing_ratio, avg_allocation_msg, positions[missing_stocks[:5].union(missing_stocks[[-1 ]])].mean(), ) warnings.warn(missing_stocks_warning_msg) positions = positions.drop(missing_stocks, axis='columns', errors='ignore') missing_factor_loadings_index = positions.index.difference( factor_loadings.index.get_level_values(0).unique()) if len(missing_factor_loadings_index) > 0: if len(missing_factor_loadings_index) > 5: missing_dates_displayed = ( "(first missing is {}, last missing is {})").format( missing_factor_loadings_index[0], missing_factor_loadings_index[-1]) else: missing_dates_displayed = list(missing_factor_loadings_index) warning_msg = ( "Could not find factor loadings for {} dates: {}. " "Truncating date range for performance attribution. ").format( len(missing_factor_loadings_index), missing_dates_displayed) warnings.warn(warning_msg) positions = positions.drop(missing_factor_loadings_index, errors='ignore') returns = returns.drop(missing_factor_loadings_index, errors='ignore') factor_returns = factor_returns.drop(missing_factor_loadings_index, errors='ignore') if transactions is not None and pos_in_dollars: turnover = get_turnover(positions, transactions).mean() if turnover > PERF_ATTRIB_TURNOVER_THRESHOLD: warning_msg = ( "This algorithm has relatively high turnover of its " "positions. As a result, performance attribution might not be " "fully accurate.\n" "\n" "Performance attribution is calculated based " "on end-of-day holdings and does not account for intraday " "activity. Algorithms that derive a high percentage of " "returns from buying and selling within the same day may " "receive inaccurate performance attribution.\n") warnings.warn(warning_msg) # Note that we convert positions to percentages *after* the checks # above, since get_turnover() expects positions in dollars. if pos_in_dollars: # convert holdings to percentages positions = get_percent_alloc(positions) # remove cash after normalizing positions positions = positions.drop('cash', axis='columns') # convert positions to long format positions = positions.stack() positions.index = positions.index.set_names(['dt', 'ticker']) risk_exposures = factor_loadings.multiply(positions, axis='rows') risk_exposures_portfolio = risk_exposures.groupby(level='dt').sum() perf_attrib_by_factor = risk_exposures_portfolio.multiply(factor_returns) common_returns = perf_attrib_by_factor.sum(axis='columns') specific_returns = returns - common_returns returns_df = pd.DataFrame({ 'total_returns': returns, 'common_returns': common_returns, 'specific_returns': specific_returns }) return (risk_exposures_portfolio, pd.concat([perf_attrib_by_factor, returns_df], axis='columns'))
def _align_and_warn(returns, positions, factor_returns, factor_loadings, transactions=None, pos_in_dollars=True): """ Make sure that all inputs have matching dates and tickers, and raise warnings if necessary. """ missing_stocks = positions.columns.difference( factor_loadings.index.get_level_values(1).unique() ) # cash will not be in factor_loadings num_stocks = len(positions.columns) - 1 missing_stocks = missing_stocks.drop('cash') num_stocks_covered = num_stocks - len(missing_stocks) missing_ratio = round(len(missing_stocks) / num_stocks, ndigits=3) if num_stocks_covered == 0: raise ValueError("Could not perform performance attribution. " "No factor loadings were available for this " "algorithm's positions.") if len(missing_stocks) > 0: if len(missing_stocks) > 5: missing_stocks_displayed = ( " {} assets were missing factor loadings, including: {}..{}" ).format(len(missing_stocks), ', '.join(missing_stocks[:5].map(str)), missing_stocks[-1]) avg_allocation_msg = "selected missing assets" else: missing_stocks_displayed = ( "The following assets were missing factor loadings: {}." ).format(list(missing_stocks)) avg_allocation_msg = "missing assets" missing_stocks_warning_msg = ( "Could not determine risk exposures for some of this algorithm's " "positions. Returns from the missing assets will not be properly " "accounted for in performance attribution.\n" "\n" "{}. " "Ignoring for exposure calculation and performance attribution. " "Ratio of assets missing: {}. Average allocation of {}:\n" "\n" "{}.\n" ).format( missing_stocks_displayed, missing_ratio, avg_allocation_msg, positions[missing_stocks[:5].union(missing_stocks[[-1]])].mean(), ) warnings.warn(missing_stocks_warning_msg) positions = positions.drop(missing_stocks, axis='columns', errors='ignore') missing_factor_loadings_index = positions.index.difference( factor_loadings.index.get_level_values(0).unique() ) missing_factor_loadings_index = positions.index.difference( factor_loadings.index.get_level_values(0).unique() ) if len(missing_factor_loadings_index) > 0: if len(missing_factor_loadings_index) > 5: missing_dates_displayed = ( "(first missing is {}, last missing is {})" ).format( missing_factor_loadings_index[0], missing_factor_loadings_index[-1] ) else: missing_dates_displayed = list(missing_factor_loadings_index) warning_msg = ( "Could not find factor loadings for {} dates: {}. " "Truncating date range for performance attribution. " ).format(len(missing_factor_loadings_index), missing_dates_displayed) warnings.warn(warning_msg) positions = positions.drop(missing_factor_loadings_index, errors='ignore') returns = returns.drop(missing_factor_loadings_index, errors='ignore') factor_returns = factor_returns.drop(missing_factor_loadings_index, errors='ignore') if transactions is not None and pos_in_dollars: turnover = get_turnover(positions, transactions).mean() if turnover > PERF_ATTRIB_TURNOVER_THRESHOLD: warning_msg = ( "This algorithm has relatively high turnover of its " "positions. As a result, performance attribution might not be " "fully accurate.\n" "\n" "Performance attribution is calculated based " "on end-of-day holdings and does not account for intraday " "activity. Algorithms that derive a high percentage of " "returns from buying and selling within the same day may " "receive inaccurate performance attribution.\n" ) warnings.warn(warning_msg) return (returns, positions, factor_returns, factor_loadings)
def perf_attrib(returns, positions, factor_returns, factor_loadings, transactions=None, pos_in_dollars=True): """ Does performance attribution given risk info. Parameters ---------- returns : pd.Series Returns for each day in the date range. - Example: 2017-01-01 -0.017098 2017-01-02 0.002683 2017-01-03 -0.008669 positions: pd.DataFrame Daily holdings (in dollars or percentages), indexed by date. Will be converted to percentages if positions are in dollars. Short positions show up as cash in the 'cash' column. - Examples: AAPL TLT XOM cash 2017-01-01 34 58 10 0 2017-01-02 22 77 18 0 2017-01-03 -15 27 30 15 AAPL TLT XOM cash 2017-01-01 0.333333 0.568627 0.098039 0.0 2017-01-02 0.188034 0.658120 0.153846 0.0 2017-01-03 0.208333 0.375000 0.416667 0.0 factor_returns : pd.DataFrame Returns by factor, with date as index and factors as columns - Example: momentum reversal 2017-01-01 0.002779 -0.005453 2017-01-02 0.001096 0.010290 factor_loadings : pd.DataFrame Factor loadings for all days in the date range, with date and ticker as index, and factors as columns. - Example: momentum reversal dt ticker 2017-01-01 AAPL -1.592914 0.852830 TLT 0.184864 0.895534 XOM 0.993160 1.149353 2017-01-02 AAPL -0.140009 -0.524952 TLT -1.066978 0.185435 XOM -1.798401 0.761549 transactions : pd.DataFrame, optional Executed trade volumes and fill prices. Used to check the turnover of the algorithm. Default is None, in which case the turnover check is skipped. - One row per trade. - Trades on different names that occur at the same time will have identical indicies. - Example: index amount price symbol 2004-01-09 12:18:01 483 324.12 'AAPL' 2004-01-09 12:18:01 122 83.10 'MSFT' 2004-01-13 14:12:23 -75 340.43 'AAPL' pos_in_dollars : bool Flag indicating whether `positions` are in dollars or percentages If True, positions are in dollars. Returns ------- tuple of (risk_exposures_portfolio, perf_attribution) risk_exposures_portfolio : pd.DataFrame df indexed by datetime, with factors as columns - Example: momentum reversal dt 2017-01-01 -0.238655 0.077123 2017-01-02 0.821872 1.520515 perf_attribution : pd.DataFrame df with factors, common returns, and specific returns as columns, and datetimes as index - Example: momentum reversal common_returns specific_returns dt 2017-01-01 0.249087 0.935925 1.185012 1.185012 2017-01-02 -0.003194 -0.400786 -0.403980 -0.403980 """ missing_stocks = positions.columns.difference( factor_loadings.index.get_level_values(1).unique() ) # cash will not be in factor_loadings num_stocks = len(positions.columns) - 1 missing_stocks = missing_stocks.drop('cash') num_stocks_covered = num_stocks - len(missing_stocks) missing_ratio = round(len(missing_stocks) / num_stocks, ndigits=3) if num_stocks_covered == 0: raise ValueError("Could not perform performance attribution. " "No factor loadings were available for this " "algorithm's positions.") if len(missing_stocks) > 0: if len(missing_stocks) > 5: missing_stocks_displayed = ( " {} assets were missing factor loadings, including: {}..{}" ).format(len(missing_stocks), ', '.join(missing_stocks[:5].map(str)), missing_stocks[-1]) avg_allocation_msg = "selected missing assets" else: missing_stocks_displayed = ( "The following assets were missing factor loadings: {}." ).format(list(missing_stocks)) avg_allocation_msg = "missing assets" missing_stocks_warning_msg = ( "Could not determine risk exposures for some of this algorithm's " "positions. Returns from the missing assets will not be properly " "accounted for in performance attribution.\n" "\n" "{}. " "Ignoring for exposure calculation and performance attribution. " "Ratio of assets missing: {}. Average allocation of {}:\n" "\n" "{}.\n" ).format( missing_stocks_displayed, missing_ratio, avg_allocation_msg, positions[missing_stocks[:5].union(missing_stocks[[-1]])].mean(), ) warnings.warn(missing_stocks_warning_msg) positions = positions.drop(missing_stocks, axis='columns', errors='ignore') missing_factor_loadings_index = positions.index.difference( factor_loadings.index.get_level_values(0).unique() ) if len(missing_factor_loadings_index) > 0: if len(missing_factor_loadings_index) > 5: missing_dates_displayed = ( "(first missing is {}, last missing is {})" ).format( missing_factor_loadings_index[0], missing_factor_loadings_index[-1] ) else: missing_dates_displayed = list(missing_factor_loadings_index) warning_msg = ( "Could not find factor loadings for {} dates: {}. " "Truncating date range for performance attribution. " ).format(len(missing_factor_loadings_index), missing_dates_displayed) warnings.warn(warning_msg) positions = positions.drop(missing_factor_loadings_index, errors='ignore') returns = returns.drop(missing_factor_loadings_index, errors='ignore') factor_returns = factor_returns.drop(missing_factor_loadings_index, errors='ignore') if transactions is not None and pos_in_dollars: turnover = get_turnover(positions, transactions).mean() if turnover > PERF_ATTRIB_TURNOVER_THRESHOLD: warning_msg = ( "This algorithm has relatively high turnover of its " "positions. As a result, performance attribution might not be " "fully accurate.\n" "\n" "Performance attribution is calculated based " "on end-of-day holdings and does not account for intraday " "activity. Algorithms that derive a high percentage of " "returns from buying and selling within the same day may " "receive inaccurate performance attribution.\n" ) warnings.warn(warning_msg) # Note that we convert positions to percentages *after* the checks # above, since get_turnover() expects positions in dollars. if pos_in_dollars: # convert holdings to percentages positions = get_percent_alloc(positions) # remove cash after normalizing positions positions = positions.drop('cash', axis='columns') # convert positions to long format positions = positions.stack() positions.index = positions.index.set_names(['dt', 'ticker']) risk_exposures = factor_loadings.multiply(positions, axis='rows') risk_exposures_portfolio = risk_exposures.groupby(level='dt').sum() perf_attrib_by_factor = risk_exposures_portfolio.multiply(factor_returns) common_returns = perf_attrib_by_factor.sum(axis='columns') specific_returns = returns - common_returns returns_df = pd.DataFrame({'total_returns': returns, 'common_returns': common_returns, 'specific_returns': specific_returns}) return (risk_exposures_portfolio, pd.concat([perf_attrib_by_factor, returns_df], axis='columns'))