def assign_strategy(filtered_stocks: pd.DataFrame, algo: str, n_stocks: int) -> tuple: filtered_stocks = filtered_stocks.sample(n=n_stocks, axis=1) returns = returns_from_prices(filtered_stocks, log_returns=False) mu = mean_historical_return(filtered_stocks) assert len(mu) == n_stocks s = CovarianceShrinkage(filtered_stocks).ledoit_wolf() if algo == "hrp": return (hrp_strategy, "Hierarchical Risk Parity", { 'returns': returns }, mu, s) elif algo == "ef-sharpe": return (ef_sharpe_strategy, "Efficient Frontier - max. sharpe", { 'returns': mu, 'cov_matrix': s }, mu, s) elif algo == "ef-risk": return (ef_risk_strategy, "Efficient Frontier - efficient risk", { 'returns': mu, 'cov_matrix': s, 'target_volatility': 5.0 }, mu, s) elif algo == "ef-minvol": return (ef_minvol_strategy, "Efficient Frontier - minimum volatility", { 'returns': mu, 'cov_matrix': s }, mu, s) else: assert False
def generate_testing_set(self, stock_code): df = self.get_yahoo_finance() df = df.loc[self.cut_off_date:self.end_date] df = df.reset_index() df['PRE_DT'] = df['DT'].swifter.apply(find_nearest_trading_date) df['DT'] = df['DT'].apply(lambda x: x.date()) df = df[~df.TAG.isna()] df['TAG'] = df['TAG'].apply(lambda x: x.split(',')) df = df.explode('TAG') df = df[df.TAG == stock_code][['DT', 'TITLE', 'PRE_DT']] df = df.groupby(['DT', 'PRE_DT'])['TITLE'].apply(list).reset_index() stock_price_data = yf.download( stock_code, adj_days_to_str_date(config.trading_date_list[0], -1), config.trading_date_list[-1]) stock_return = returns_from_prices(stock_price_data['Adj Close']) stock_return = stock_return.reset_index() stock_return['Date'] = stock_return['Date'].astype(str) df = pd.merge(df, stock_return, left_on='PRE_DT', right_on='Date', how='left').drop(columns=['Date']).rename( columns={'Adj Close': stock_code}) df[stock_code + '_FLAG'] = df[stock_code].apply(lambda x: 0 if x < 0 else 1) print('Generating ' + stock_code) df.to_parquet('out/testing_set/testing_news_' + str(self.window_size) + 'd_' + stock_code + '.parquet')
def test_cdar_example(): beta = 0.95 df = get_data() mu = expected_returns.mean_historical_return(df) historical_rets = expected_returns.returns_from_prices(df).dropna() cd = EfficientCDaR(mu, historical_rets, beta=beta) w = cd.min_cdar() assert isinstance(w, dict) assert set(w.keys()) == set(cd.tickers) np.testing.assert_almost_equal(cd.weights.sum(), 1) assert all([i >= -1e-5 for i in w.values()]) np.testing.assert_allclose( cd.portfolio_performance(), (0.21502, 0.056433), rtol=1e-4, atol=1e-4, ) cdar = cd.portfolio_performance()[1] portfolio_rets = historical_rets @ cd.weights cum_rets = portfolio_rets.cumsum(0) drawdown = cum_rets.cummax() - cum_rets dar_hist = drawdown.quantile(beta) cdar_hist = drawdown[drawdown > dar_hist].mean() np.testing.assert_almost_equal(cdar_hist, cdar, decimal=3)
def test_log_returns_from_prices(): df = get_data() old_nan = df.isnull().sum(axis=1).sum() log_rets = expected_returns.returns_from_prices(df, log_returns=True) new_nan = log_rets.isnull().sum(axis=1).sum() assert new_nan == old_nan np.testing.assert_almost_equal(log_rets.iloc[-1, -1], 0.0001682740081102576)
def test_custom_tracking_error(): df = get_data() historical_rets = expected_returns.returns_from_prices(df).dropna() benchmark_rets = historical_rets["AAPL"] historical_rets = historical_rets.drop("AAPL", axis=1) S = risk_models.sample_cov(historical_rets, returns_data=True) opt = BaseConvexOptimizer( n_assets=len(historical_rets.columns), tickers=list(historical_rets.columns), weight_bounds=(0, 1), ) opt.convex_objective( objective_functions.ex_post_tracking_error, historic_returns=historical_rets, benchmark_returns=benchmark_rets, ) w = opt.clean_weights() ef = EfficientFrontier(None, S) ef.convex_objective( objective_functions.ex_post_tracking_error, historic_returns=historical_rets, benchmark_returns=benchmark_rets, ) w2 = ef.clean_weights() assert w == w2
def test_james_stein(): df = get_data() js = expected_returns.james_stein_shrinkage(df) correct_mean = np.array([ 0.25870218, 0.32318595, 0.29184719, 0.23082673, 0.41448111, 0.19238474, 0.23175124, 0.17825652, 0.20656697, 0.13374178, 0.16512141, 0.25217574, 0.12991287, 0.18700597, 0.21668984, 0.3147078, 0.33948993, 0.24437593, 0.225335, 0.27014272, ]) np.testing.assert_array_almost_equal(js.values, correct_mean) # Test shrinkage y = expected_returns.returns_from_prices(df).mean(axis=0) * 252 nu = y.mean() assert (((js <= nu) & (js >= y)) | ((js >= nu) & (js <= y))).all()
def hrp(my_portfolio, perf=True) -> list: # changed to take in desired timeline, the problem is that it would use all historical data ohlc = yf.download( my_portfolio.portfolio, start=my_portfolio.start_date, end=my_portfolio.end_date, progress=False, ) prices = ohlc["Adj Close"].dropna(how="all") prices = prices.filter(my_portfolio.portfolio) # sometimes we will pick a date range where company isn't public we can't set price to 0 so it has to go to 1 prices = prices.fillna(1) rets = expected_returns.returns_from_prices(prices) hrp = HRPOpt(rets) hrp.optimize() weights = hrp.clean_weights() wts = weights.items() result = [] for val in wts: a, b = map(list, zip(*[val])) result.append(b) if perf is True: hrp.portfolio_performance(verbose=True) return flatten(result)
def test_risk_matrix_and_returns_data(): # Test the switcher method for simple calls df = get_data() for method in { "sample_cov", "semicovariance", "exp_cov", # FIXME: this fails "min_cov_determinant", "ledoit_wolf", "ledoit_wolf_constant_variance", "ledoit_wolf_single_factor", "ledoit_wolf_constant_correlation", "oracle_approximating", }: S = risk_models.risk_matrix(df, method=method) assert S.shape == (20, 20) assert S.notnull().all().all() assert risk_models._is_positive_semidefinite(S) S2 = risk_models.risk_matrix(expected_returns.returns_from_prices(df), returns_data=True, method=method) pd.testing.assert_frame_equal(S, S2)
def setup_optimisation_matrices(stocks, timeframe: Timeframe, exclude_price, warning_cb): # ref: https://pyportfolioopt.readthedocs.io/en/latest/UserGuide.html#processing-historical-prices stock_prices = company_prices(stocks, timeframe, fields="last_price", missing_cb=None) stock_prices = stock_prices.fillna(method="bfill", limit=10, axis=0) latest_date = stock_prices.index[-1] earliest_date = stock_prices.index[0] # print(stock_prices) stock_prices = remove_bad_stocks(stock_prices, earliest_date, exclude_price, warning_cb) stock_prices = remove_bad_stocks(stock_prices, latest_date, exclude_price, warning_cb) latest_prices = stock_prices.loc[latest_date] first_prices = stock_prices.loc[earliest_date] all_returns = returns_from_prices(stock_prices, log_returns=False).fillna(value=0.0) # check that the matrices are consistent to each other assert stock_prices.shape[1] == latest_prices.shape[0] assert stock_prices.shape[1] == all_returns.shape[1] assert all_returns.shape[0] == stock_prices.shape[0] - 1 assert len(stock_prices.columns) > 0 # must have at least 1 stock assert len(stock_prices) > 7 # and at least one trading week of data # print(stock_prices.shape) # print(latest_prices) # print(all_returns.shape) return all_returns, stock_prices, latest_prices, first_prices
def test_cvar_example(): beta = 0.95 df = get_data() mu = expected_returns.mean_historical_return(df) historical_rets = expected_returns.returns_from_prices(df).dropna() cv = EfficientCVaR(mu, historical_rets, beta=beta) w = cv.min_cvar() assert isinstance(w, dict) assert set(w.keys()) == set(cv.tickers) np.testing.assert_almost_equal(cv.weights.sum(), 1) assert all([i >= -1e-5 for i in w.values()]) np.testing.assert_allclose( cv.portfolio_performance(), (0.17745746040573562, 0.017049502122532853), rtol=1e-4, atol=1e-4, ) cvar = cv.portfolio_performance()[1] portfolio_rets = historical_rets @ cv.weights var_hist = portfolio_rets.quantile(1 - beta) cvar_hist = -portfolio_rets[portfolio_rets < var_hist].mean() np.testing.assert_almost_equal(cvar_hist, cvar, decimal=3)
def test_returns_dataframe(): df = get_data() returns_df = expected_returns.returns_from_prices(df) assert isinstance(returns_df, pd.DataFrame) assert returns_df.shape[1] == 20 assert len(returns_df) == 7125 assert returns_df.index.is_all_dates assert not ((returns_df > 1) & returns_df.notnull()).any().any()
def setup_efficient_semivariance(data_only=False, solver=None, verbose=False): df = get_data().dropna(axis=0, how="any") mean_return = expected_returns.mean_historical_return(df, compounding=False) historic_returns = returns_from_prices(df) if data_only: return mean_return, historic_returns return EfficientSemivariance(mean_return, historic_returns, solver=solver, verbose=verbose)
def hrp_portfolio(prices): """Solve for Hierarchical risk parity portfolio Arguments: prices (pd.DataFrame) – adjusted (daily) closing prices of the asset, each row is a date and each column is a ticker/id. """ returns = expected_returns.returns_from_prices(prices) hrp = HRPOpt(returns) weights = hrp.hrp_portfolio() return weights
def test_log_returns_from_prices(): df = get_data() old_nan = df.isnull().sum(axis=1).sum() log_rets = expected_returns.returns_from_prices(df, log_returns=True) new_nan = log_rets.isnull().sum(axis=1).sum() assert new_nan == old_nan np.testing.assert_almost_equal(log_rets.iloc[-1, -1], 0.0001682740081102576) # Test the deprecated function, until it is removed. deprecated_log_rets = expected_returns.log_returns_from_prices(df) np.testing.assert_allclose(deprecated_log_rets, log_rets)
def get_movement(): stock_return = returns_from_prices(get_stock_price_data()['Adj Close'], log_returns=True) movement = stock_return.reset_index().rename(columns={0: 'FLAG'}) movement['Date'] = movement['Date'].astype(str) for stock in config.stock_list: convert_price2movement(movement, stock) return movement
def test_es_example_short(): df = get_data() mu = expected_returns.mean_historical_return(df) historical_rets = expected_returns.returns_from_prices(df).dropna() es = EfficientSemivariance(mu, historical_rets, weight_bounds=(-1, 1)) w = es.efficient_return(0.2, market_neutral=True) goog_weight = w["GOOG"] historical_rets["GOOG"] -= historical_rets["GOOG"].quantile(0.75) es = EfficientSemivariance(mu, historical_rets, weight_bounds=(-1, 1)) w = es.efficient_return(0.2, market_neutral=True) goog_weight2 = w["GOOG"] assert abs(goog_weight2) >= abs(goog_weight)
def test_es_example_weekly(): df = get_data() df = df.resample("W").first() mu = expected_returns.mean_historical_return(df, frequency=52) historical_rets = expected_returns.returns_from_prices(df).dropna() es = EfficientSemivariance(mu, historical_rets, frequency=52) es.efficient_return(0.2) np.testing.assert_allclose( es.portfolio_performance(), (0.2000000562544616, 0.07667633475531543, 2.3475307841574087), rtol=1e-4, atol=1e-4, )
def test_es_errors(): df = get_data() mu = expected_returns.mean_historical_return(df) historical_rets = expected_returns.returns_from_prices(df) with pytest.warns(UserWarning): EfficientSemivariance(mu, historical_rets) historical_rets = historical_rets.dropna(axis=0, how="any") assert EfficientSemivariance(mu, historical_rets) historical_rets = historical_rets.iloc[:, :-1] with pytest.raises(ValueError): EfficientSemivariance(mu, historical_rets)
def test_es_example_monthly(): df = get_data() df = df.resample("M").first() mu = expected_returns.mean_historical_return(df, frequency=12) historical_rets = expected_returns.returns_from_prices(df).dropna() es = EfficientSemivariance(mu, historical_rets, frequency=12) es.efficient_return(0.3) np.testing.assert_allclose( es.portfolio_performance(), (0.3, 0.04746519522734184, 5.899059271933824), rtol=1e-4, atol=1e-4, )
def assign_strategy(ld: LazyDictionary, algo: str) -> tuple: assert ld is not None # use of black-litterman is based on https://github.com/robertmartin8/PyPortfolioOpt/blob/master/cookbook/4-Black-Litterman-Allocation.ipynb # print(market_prices) ld["s"] = CovarianceShrinkage(ld["filtered_stocks"]).ledoit_wolf() ld["delta"] = market_implied_risk_aversion(ld["market_prices"]) # use BlackLitterman model to compute returns - hopefully better estimate of returns than extrapolation of historical prices # market_prior = market_implied_prior_returns(ld["market_caps"], delta, ld["s"]) ld["bl"] = lambda ld: BlackLittermanModel( ld["s"], pi="market", market_caps=ld["market_caps"], risk_aversion=abs(ld["delta"]), absolute_views={}, ) ld["posterior_total_returns"] = lambda ld: ld["bl"].bl_returns() ld["posterior_s"] = lambda ld: ld["bl"].bl_cov() ld["mu"] = lambda ld: mean_historical_return(ld["filtered_stocks"]) ld["returns_from_prices"] = lambda ld: returns_from_prices(ld[ "filtered_stocks"]) use_bl = ld["returns_by"] != "by_prices" kwargs = ({ "returns": ld["mu"] } if use_bl else { "returns": ld["posterior_total_returns"] }) if algo != "hrp": kwargs["cov_matrix"] = ld["s"] if not use_bl else ld["posterior_s"] else: # algo is HRP kwargs = {"returns": ld["returns_from_prices"]} if algo == "hrp": ld["title"] = "Hierarchical Risk Parity" return (hrp_strategy, kwargs) elif algo == "ef-sharpe": ld["title"] = "Efficient Frontier - max. sharpe" return (ef_sharpe_strategy, kwargs) elif algo == "ef-risk": ld["title"] = "Efficient Frontier - efficient risk" kwargs["target_volatility"] = 5.0 return (ef_risk_strategy, kwargs) elif algo == "ef-minvol": ld["title"] = "Efficient Frontier - minimum volatility" return (ef_minvol_strategy, kwargs) else: assert False
def test_risk_matrix_and_returns_data(): # Test the switcher method for simple calls df = get_data() for method in {"mean_historical_return", "ema_historical_return", "capm_return"}: mu = expected_returns.return_model(df, method=method) assert isinstance(mu, pd.Series) assert list(mu.index) == list(df.columns) assert mu.notnull().all() assert mu.dtype == "float64" mu2 = expected_returns.return_model( expected_returns.returns_from_prices(df), method=method, returns_data=True ) pd.testing.assert_series_equal(mu, mu2)
def setup_efficient_cvar(data_only=False, solver=None, verbose=False, solver_options=None): df = get_data().dropna(axis=0, how="any") mean_return = expected_returns.mean_historical_return(df) historic_returns = returns_from_prices(df) if data_only: return mean_return, historic_returns return EfficientCVaR( mean_return, historic_returns, verbose=verbose, solver=solver, solver_options=solver_options, )
def test_ex_post_tracking_error(): df = get_data() rets = returns_from_prices(df).dropna() bm_rets = rets.mean(axis=1) w = np.ones((len(df.columns), )) / len(df.columns) # TE with the mean should be zero te = objective_functions.ex_post_tracking_error(w, rets, bm_rets) np.testing.assert_almost_equal(te, 0) # Should increase prev_te = te for mult in range(2, 20, 4): bm_rets_new = bm_rets * mult te = objective_functions.ex_post_tracking_error(w, rets, bm_rets_new) assert te > prev_te prev_te = te
def test_es_example(): df = get_data() mu = expected_returns.mean_historical_return(df) historical_rets = expected_returns.returns_from_prices(df).dropna() es = EfficientSemivariance(mu, historical_rets) w = es.efficient_return(0.2) assert isinstance(w, dict) assert set(w.keys()) == set(es.tickers) np.testing.assert_almost_equal(es.weights.sum(), 1) assert all([i >= -1e-5 for i in w.values()]) np.testing.assert_allclose( es.portfolio_performance(), (0.20, 0.08558991313395496, 2.1030523036993265), rtol=1e-4, atol=1e-4, )
def test_cvar_example_short(): df = get_data() mu = expected_returns.mean_historical_return(df) historical_rets = expected_returns.returns_from_prices(df).dropna() cv = EfficientCVaR( mu, historical_rets, weight_bounds=(-1, 1), ) w = cv.efficient_return(0.2, market_neutral=True) assert isinstance(w, dict) assert set(w.keys()) == set(cv.tickers) np.testing.assert_almost_equal(cv.weights.sum(), 0) np.testing.assert_allclose( cv.portfolio_performance(), (0.2, 0.013406209257292611), rtol=1e-4, atol=1e-4, )
def test_cvar_errors(): df = get_data() mu = expected_returns.mean_historical_return(df) historical_rets = expected_returns.returns_from_prices(df) with pytest.warns(UserWarning): EfficientCVaR(mu, historical_rets) historical_rets = historical_rets.dropna(axis=0, how="any") assert EfficientCVaR(mu, historical_rets) cv = setup_efficient_cvar() with pytest.raises(NotImplementedError): cv.min_volatility() with pytest.raises(NotImplementedError): cv.max_sharpe() with pytest.raises(NotImplementedError): cv.max_quadratic_utility() with pytest.raises(ValueError): # Beta must be between 0 and 1 cv = EfficientCVaR(mu, historical_rets, 1) with pytest.warns(UserWarning): cv = EfficientCVaR(mu, historical_rets, 0.1) with pytest.raises(OptimizationError): # Must be <= max expected return cv = EfficientCVaR(mu, historical_rets) cv.efficient_return(target_return=np.abs(mu).max() + 0.01) with pytest.raises(AttributeError): # list not supported. EfficientCVaR(mu, historical_rets.to_numpy().tolist()) historical_rets = historical_rets.iloc[:, :-1] with pytest.raises(ValueError): EfficientCVaR(mu, historical_rets)
def test_cvar_example_weekly(): beta = 0.95 df = get_data() df = df.resample("W").first() mu = expected_returns.mean_historical_return(df, frequency=52) historical_rets = expected_returns.returns_from_prices(df).dropna() cv = EfficientCVaR(mu, historical_rets, beta=beta) cv.efficient_return(0.2) np.testing.assert_allclose( cv.portfolio_performance(), (0.2, 0.03447723250708958), rtol=1e-4, atol=1e-4, ) cvar = cv.portfolio_performance()[1] portfolio_rets = historical_rets @ cv.weights var_hist = portfolio_rets.quantile(1 - beta) cvar_hist = -portfolio_rets[portfolio_rets < var_hist].mean() np.testing.assert_almost_equal(cvar_hist, cvar, decimal=3)
def test_risk_matrix_additional_kwargs(): df = get_data() S = risk_models.sample_cov(df) S2 = risk_models.risk_matrix(df, frequency=2) pd.testing.assert_frame_equal(S / 126, S2) S = risk_models.risk_matrix( df, method="semicovariance", benchmark=0.0004, frequency=52 ) assert S.shape == (20, 20) assert S.notnull().all().all() assert risk_models._is_positive_semidefinite(S) S = risk_models.risk_matrix( expected_returns.returns_from_prices(df), returns_data=True, method="exp_cov", span=60, fix_method="diag", ) assert S.shape == (20, 20) assert S.notnull().all().all() assert risk_models._is_positive_semidefinite(S)
def test_es_errors(): df = get_data() mu = expected_returns.mean_historical_return(df) historical_rets = expected_returns.returns_from_prices(df) with pytest.warns(UserWarning): EfficientSemivariance(mu, historical_rets) historical_rets = historical_rets.dropna(axis=0, how="any") es = EfficientSemivariance(mu, historical_rets) with pytest.raises(NotImplementedError): es.min_volatility() with pytest.raises(NotImplementedError): es.max_sharpe() with pytest.raises(ValueError): # Must be > 0 es.max_quadratic_utility(risk_aversion=-0.01) with pytest.raises(ValueError): # Must be > 0 es.efficient_return(target_return=-0.01) with pytest.raises(ValueError): # Must be <= max expected return es.efficient_return(target_return=np.abs(mu).max() + 0.01) with pytest.raises(AttributeError): # list not supported. EfficientSemivariance(mu, historical_rets.to_numpy().tolist()) historical_rets = historical_rets.iloc[:, :-1] with pytest.raises(ValueError): EfficientSemivariance(mu, historical_rets)
def test_cdar_example_weekly(): beta = 0.90 df = get_data() df = df.resample("W").first() mu = expected_returns.mean_historical_return(df, frequency=52) historical_rets = expected_returns.returns_from_prices(df).dropna() cd = EfficientCDaR(mu, historical_rets, beta=beta) cd.efficient_return(0.21) np.testing.assert_allclose( cd.portfolio_performance(), (0.21, 0.045085), rtol=1e-4, atol=1e-4, ) cdar = cd.portfolio_performance()[1] portfolio_rets = historical_rets @ cd.weights cum_rets = portfolio_rets.cumsum(0) drawdown = cum_rets.cummax() - cum_rets dar_hist = drawdown.quantile(beta) cdar_hist = drawdown[drawdown > dar_hist].mean() np.testing.assert_almost_equal(cdar_hist, cdar, decimal=3)