예제 #1
0
def assign_strategy(filtered_stocks: pd.DataFrame, algo: str,
                    n_stocks: int) -> tuple:
    filtered_stocks = filtered_stocks.sample(n=n_stocks, axis=1)
    returns = returns_from_prices(filtered_stocks, log_returns=False)
    mu = mean_historical_return(filtered_stocks)
    assert len(mu) == n_stocks
    s = CovarianceShrinkage(filtered_stocks).ledoit_wolf()

    if algo == "hrp":
        return (hrp_strategy, "Hierarchical Risk Parity", {
            'returns': returns
        }, mu, s)
    elif algo == "ef-sharpe":
        return (ef_sharpe_strategy, "Efficient Frontier - max. sharpe", {
            'returns': mu,
            'cov_matrix': s
        }, mu, s)
    elif algo == "ef-risk":
        return (ef_risk_strategy, "Efficient Frontier - efficient risk", {
            'returns': mu,
            'cov_matrix': s,
            'target_volatility': 5.0
        }, mu, s)
    elif algo == "ef-minvol":
        return (ef_minvol_strategy, "Efficient Frontier - minimum volatility",
                {
                    'returns': mu,
                    'cov_matrix': s
                }, mu, s)
    else:
        assert False
예제 #2
0
    def generate_testing_set(self, stock_code):
        df = self.get_yahoo_finance()

        df = df.loc[self.cut_off_date:self.end_date]
        df = df.reset_index()
        df['PRE_DT'] = df['DT'].swifter.apply(find_nearest_trading_date)
        df['DT'] = df['DT'].apply(lambda x: x.date())
        df = df[~df.TAG.isna()]
        df['TAG'] = df['TAG'].apply(lambda x: x.split(','))
        df = df.explode('TAG')
        df = df[df.TAG == stock_code][['DT', 'TITLE', 'PRE_DT']]
        df = df.groupby(['DT', 'PRE_DT'])['TITLE'].apply(list).reset_index()

        stock_price_data = yf.download(
            stock_code, adj_days_to_str_date(config.trading_date_list[0], -1),
            config.trading_date_list[-1])
        stock_return = returns_from_prices(stock_price_data['Adj Close'])
        stock_return = stock_return.reset_index()
        stock_return['Date'] = stock_return['Date'].astype(str)

        df = pd.merge(df,
                      stock_return,
                      left_on='PRE_DT',
                      right_on='Date',
                      how='left').drop(columns=['Date']).rename(
                          columns={'Adj Close': stock_code})

        df[stock_code +
           '_FLAG'] = df[stock_code].apply(lambda x: 0 if x < 0 else 1)

        print('Generating ' + stock_code)
        df.to_parquet('out/testing_set/testing_news_' + str(self.window_size) +
                      'd_' + stock_code + '.parquet')
def test_cdar_example():
    beta = 0.95
    df = get_data()
    mu = expected_returns.mean_historical_return(df)
    historical_rets = expected_returns.returns_from_prices(df).dropna()

    cd = EfficientCDaR(mu, historical_rets, beta=beta)
    w = cd.min_cdar()

    assert isinstance(w, dict)
    assert set(w.keys()) == set(cd.tickers)
    np.testing.assert_almost_equal(cd.weights.sum(), 1)
    assert all([i >= -1e-5 for i in w.values()])

    np.testing.assert_allclose(
        cd.portfolio_performance(),
        (0.21502, 0.056433),
        rtol=1e-4,
        atol=1e-4,
    )

    cdar = cd.portfolio_performance()[1]
    portfolio_rets = historical_rets @ cd.weights
    cum_rets = portfolio_rets.cumsum(0)
    drawdown = cum_rets.cummax() - cum_rets

    dar_hist = drawdown.quantile(beta)
    cdar_hist = drawdown[drawdown > dar_hist].mean()
    np.testing.assert_almost_equal(cdar_hist, cdar, decimal=3)
def test_log_returns_from_prices():
    df = get_data()
    old_nan = df.isnull().sum(axis=1).sum()
    log_rets = expected_returns.returns_from_prices(df, log_returns=True)
    new_nan = log_rets.isnull().sum(axis=1).sum()
    assert new_nan == old_nan
    np.testing.assert_almost_equal(log_rets.iloc[-1, -1], 0.0001682740081102576)
def test_custom_tracking_error():
    df = get_data()
    historical_rets = expected_returns.returns_from_prices(df).dropna()
    benchmark_rets = historical_rets["AAPL"]
    historical_rets = historical_rets.drop("AAPL", axis=1)
    S = risk_models.sample_cov(historical_rets, returns_data=True)

    opt = BaseConvexOptimizer(
        n_assets=len(historical_rets.columns),
        tickers=list(historical_rets.columns),
        weight_bounds=(0, 1),
    )

    opt.convex_objective(
        objective_functions.ex_post_tracking_error,
        historic_returns=historical_rets,
        benchmark_returns=benchmark_rets,
    )
    w = opt.clean_weights()

    ef = EfficientFrontier(None, S)
    ef.convex_objective(
        objective_functions.ex_post_tracking_error,
        historic_returns=historical_rets,
        benchmark_returns=benchmark_rets,
    )
    w2 = ef.clean_weights()

    assert w == w2
def test_james_stein():
    df = get_data()
    js = expected_returns.james_stein_shrinkage(df)
    correct_mean = np.array([
        0.25870218,
        0.32318595,
        0.29184719,
        0.23082673,
        0.41448111,
        0.19238474,
        0.23175124,
        0.17825652,
        0.20656697,
        0.13374178,
        0.16512141,
        0.25217574,
        0.12991287,
        0.18700597,
        0.21668984,
        0.3147078,
        0.33948993,
        0.24437593,
        0.225335,
        0.27014272,
    ])
    np.testing.assert_array_almost_equal(js.values, correct_mean)

    # Test shrinkage
    y = expected_returns.returns_from_prices(df).mean(axis=0) * 252
    nu = y.mean()
    assert (((js <= nu) & (js >= y)) | ((js >= nu) & (js <= y))).all()
예제 #7
0
def hrp(my_portfolio, perf=True) -> list:
    # changed to take in desired timeline, the problem is that it would use all historical data

    ohlc = yf.download(
        my_portfolio.portfolio,
        start=my_portfolio.start_date,
        end=my_portfolio.end_date,
        progress=False,
    )
    prices = ohlc["Adj Close"].dropna(how="all")
    prices = prices.filter(my_portfolio.portfolio)

    # sometimes we will pick a date range where company isn't public we can't set price to 0 so it has to go to 1
    prices = prices.fillna(1)

    rets = expected_returns.returns_from_prices(prices)
    hrp = HRPOpt(rets)
    hrp.optimize()
    weights = hrp.clean_weights()

    wts = weights.items()

    result = []
    for val in wts:
        a, b = map(list, zip(*[val]))
        result.append(b)

    if perf is True:
        hrp.portfolio_performance(verbose=True)

    return flatten(result)
예제 #8
0
def test_risk_matrix_and_returns_data():
    # Test the switcher method for simple calls
    df = get_data()

    for method in {
            "sample_cov",
            "semicovariance",
            "exp_cov",
            # FIXME: this fails "min_cov_determinant",
            "ledoit_wolf",
            "ledoit_wolf_constant_variance",
            "ledoit_wolf_single_factor",
            "ledoit_wolf_constant_correlation",
            "oracle_approximating",
    }:

        S = risk_models.risk_matrix(df, method=method)
        assert S.shape == (20, 20)
        assert S.notnull().all().all()
        assert risk_models._is_positive_semidefinite(S)

        S2 = risk_models.risk_matrix(expected_returns.returns_from_prices(df),
                                     returns_data=True,
                                     method=method)
        pd.testing.assert_frame_equal(S, S2)
예제 #9
0
def setup_optimisation_matrices(stocks, timeframe: Timeframe, exclude_price,
                                warning_cb):
    # ref: https://pyportfolioopt.readthedocs.io/en/latest/UserGuide.html#processing-historical-prices

    stock_prices = company_prices(stocks,
                                  timeframe,
                                  fields="last_price",
                                  missing_cb=None)
    stock_prices = stock_prices.fillna(method="bfill", limit=10, axis=0)
    latest_date = stock_prices.index[-1]
    earliest_date = stock_prices.index[0]
    # print(stock_prices)

    stock_prices = remove_bad_stocks(stock_prices, earliest_date,
                                     exclude_price, warning_cb)
    stock_prices = remove_bad_stocks(stock_prices, latest_date, exclude_price,
                                     warning_cb)

    latest_prices = stock_prices.loc[latest_date]
    first_prices = stock_prices.loc[earliest_date]
    all_returns = returns_from_prices(stock_prices,
                                      log_returns=False).fillna(value=0.0)

    # check that the matrices are consistent to each other
    assert stock_prices.shape[1] == latest_prices.shape[0]
    assert stock_prices.shape[1] == all_returns.shape[1]
    assert all_returns.shape[0] == stock_prices.shape[0] - 1
    assert len(stock_prices.columns) > 0  # must have at least 1 stock
    assert len(stock_prices) > 7  # and at least one trading week of data

    # print(stock_prices.shape)
    # print(latest_prices)
    # print(all_returns.shape)

    return all_returns, stock_prices, latest_prices, first_prices
def test_cvar_example():
    beta = 0.95
    df = get_data()
    mu = expected_returns.mean_historical_return(df)
    historical_rets = expected_returns.returns_from_prices(df).dropna()

    cv = EfficientCVaR(mu, historical_rets, beta=beta)
    w = cv.min_cvar()

    assert isinstance(w, dict)
    assert set(w.keys()) == set(cv.tickers)
    np.testing.assert_almost_equal(cv.weights.sum(), 1)
    assert all([i >= -1e-5 for i in w.values()])

    np.testing.assert_allclose(
        cv.portfolio_performance(),
        (0.17745746040573562, 0.017049502122532853),
        rtol=1e-4,
        atol=1e-4,
    )

    cvar = cv.portfolio_performance()[1]
    portfolio_rets = historical_rets @ cv.weights

    var_hist = portfolio_rets.quantile(1 - beta)
    cvar_hist = -portfolio_rets[portfolio_rets < var_hist].mean()
    np.testing.assert_almost_equal(cvar_hist, cvar, decimal=3)
def test_returns_dataframe():
    df = get_data()
    returns_df = expected_returns.returns_from_prices(df)
    assert isinstance(returns_df, pd.DataFrame)
    assert returns_df.shape[1] == 20
    assert len(returns_df) == 7125
    assert returns_df.index.is_all_dates
    assert not ((returns_df > 1) & returns_df.notnull()).any().any()
def setup_efficient_semivariance(data_only=False, solver=None, verbose=False):
    df = get_data().dropna(axis=0, how="any")
    mean_return = expected_returns.mean_historical_return(df,
                                                          compounding=False)
    historic_returns = returns_from_prices(df)
    if data_only:
        return mean_return, historic_returns
    return EfficientSemivariance(mean_return,
                                 historic_returns,
                                 solver=solver,
                                 verbose=verbose)
예제 #13
0
def hrp_portfolio(prices):
    """Solve for Hierarchical risk parity portfolio

    Arguments:
        prices (pd.DataFrame) – adjusted (daily) closing prices of the asset, each row is a date and each column is a ticker/id.
    """

    returns = expected_returns.returns_from_prices(prices)
    hrp = HRPOpt(returns)
    weights = hrp.hrp_portfolio()
    return weights
def test_log_returns_from_prices():
    df = get_data()
    old_nan = df.isnull().sum(axis=1).sum()
    log_rets = expected_returns.returns_from_prices(df, log_returns=True)
    new_nan = log_rets.isnull().sum(axis=1).sum()
    assert new_nan == old_nan
    np.testing.assert_almost_equal(log_rets.iloc[-1, -1],
                                   0.0001682740081102576)
    # Test the deprecated function, until it is removed.
    deprecated_log_rets = expected_returns.log_returns_from_prices(df)
    np.testing.assert_allclose(deprecated_log_rets, log_rets)
예제 #15
0
def get_movement():
    stock_return = returns_from_prices(get_stock_price_data()['Adj Close'],
                                       log_returns=True)

    movement = stock_return.reset_index().rename(columns={0: 'FLAG'})
    movement['Date'] = movement['Date'].astype(str)

    for stock in config.stock_list:
        convert_price2movement(movement, stock)

    return movement
def test_es_example_short():
    df = get_data()
    mu = expected_returns.mean_historical_return(df)
    historical_rets = expected_returns.returns_from_prices(df).dropna()
    es = EfficientSemivariance(mu, historical_rets, weight_bounds=(-1, 1))
    w = es.efficient_return(0.2, market_neutral=True)
    goog_weight = w["GOOG"]

    historical_rets["GOOG"] -= historical_rets["GOOG"].quantile(0.75)
    es = EfficientSemivariance(mu, historical_rets, weight_bounds=(-1, 1))
    w = es.efficient_return(0.2, market_neutral=True)
    goog_weight2 = w["GOOG"]
    assert abs(goog_weight2) >= abs(goog_weight)
예제 #17
0
def test_es_example_weekly():
    df = get_data()
    df = df.resample("W").first()
    mu = expected_returns.mean_historical_return(df, frequency=52)
    historical_rets = expected_returns.returns_from_prices(df).dropna()
    es = EfficientSemivariance(mu, historical_rets, frequency=52)
    es.efficient_return(0.2)
    np.testing.assert_allclose(
        es.portfolio_performance(),
        (0.2000000562544616, 0.07667633475531543, 2.3475307841574087),
        rtol=1e-4,
        atol=1e-4,
    )
예제 #18
0
def test_es_errors():
    df = get_data()
    mu = expected_returns.mean_historical_return(df)
    historical_rets = expected_returns.returns_from_prices(df)

    with pytest.warns(UserWarning):
        EfficientSemivariance(mu, historical_rets)

    historical_rets = historical_rets.dropna(axis=0, how="any")
    assert EfficientSemivariance(mu, historical_rets)

    historical_rets = historical_rets.iloc[:, :-1]
    with pytest.raises(ValueError):
        EfficientSemivariance(mu, historical_rets)
예제 #19
0
def test_es_example_monthly():
    df = get_data()
    df = df.resample("M").first()
    mu = expected_returns.mean_historical_return(df, frequency=12)
    historical_rets = expected_returns.returns_from_prices(df).dropna()
    es = EfficientSemivariance(mu, historical_rets, frequency=12)

    es.efficient_return(0.3)
    np.testing.assert_allclose(
        es.portfolio_performance(),
        (0.3, 0.04746519522734184, 5.899059271933824),
        rtol=1e-4,
        atol=1e-4,
    )
예제 #20
0
def assign_strategy(ld: LazyDictionary, algo: str) -> tuple:
    assert ld is not None
    # use of black-litterman is based on https://github.com/robertmartin8/PyPortfolioOpt/blob/master/cookbook/4-Black-Litterman-Allocation.ipynb
    # print(market_prices)
    ld["s"] = CovarianceShrinkage(ld["filtered_stocks"]).ledoit_wolf()
    ld["delta"] = market_implied_risk_aversion(ld["market_prices"])

    # use BlackLitterman model to compute returns - hopefully better estimate of returns than extrapolation of historical prices
    # market_prior = market_implied_prior_returns(ld["market_caps"], delta, ld["s"])
    ld["bl"] = lambda ld: BlackLittermanModel(
        ld["s"],
        pi="market",
        market_caps=ld["market_caps"],
        risk_aversion=abs(ld["delta"]),
        absolute_views={},
    )
    ld["posterior_total_returns"] = lambda ld: ld["bl"].bl_returns()
    ld["posterior_s"] = lambda ld: ld["bl"].bl_cov()
    ld["mu"] = lambda ld: mean_historical_return(ld["filtered_stocks"])
    ld["returns_from_prices"] = lambda ld: returns_from_prices(ld[
        "filtered_stocks"])

    use_bl = ld["returns_by"] != "by_prices"
    kwargs = ({
        "returns": ld["mu"]
    } if use_bl else {
        "returns": ld["posterior_total_returns"]
    })
    if algo != "hrp":
        kwargs["cov_matrix"] = ld["s"] if not use_bl else ld["posterior_s"]
    else:
        # algo is HRP
        kwargs = {"returns": ld["returns_from_prices"]}

    if algo == "hrp":
        ld["title"] = "Hierarchical Risk Parity"
        return (hrp_strategy, kwargs)
    elif algo == "ef-sharpe":
        ld["title"] = "Efficient Frontier - max. sharpe"
        return (ef_sharpe_strategy, kwargs)
    elif algo == "ef-risk":
        ld["title"] = "Efficient Frontier - efficient risk"
        kwargs["target_volatility"] = 5.0
        return (ef_risk_strategy, kwargs)
    elif algo == "ef-minvol":
        ld["title"] = "Efficient Frontier - minimum volatility"
        return (ef_minvol_strategy, kwargs)
    else:
        assert False
def test_risk_matrix_and_returns_data():
    # Test the switcher method for simple calls
    df = get_data()

    for method in {"mean_historical_return", "ema_historical_return", "capm_return"}:
        mu = expected_returns.return_model(df, method=method)

        assert isinstance(mu, pd.Series)
        assert list(mu.index) == list(df.columns)
        assert mu.notnull().all()
        assert mu.dtype == "float64"

        mu2 = expected_returns.return_model(
            expected_returns.returns_from_prices(df), method=method, returns_data=True
        )
        pd.testing.assert_series_equal(mu, mu2)
def setup_efficient_cvar(data_only=False,
                         solver=None,
                         verbose=False,
                         solver_options=None):
    df = get_data().dropna(axis=0, how="any")
    mean_return = expected_returns.mean_historical_return(df)
    historic_returns = returns_from_prices(df)
    if data_only:
        return mean_return, historic_returns
    return EfficientCVaR(
        mean_return,
        historic_returns,
        verbose=verbose,
        solver=solver,
        solver_options=solver_options,
    )
예제 #23
0
def test_ex_post_tracking_error():
    df = get_data()
    rets = returns_from_prices(df).dropna()
    bm_rets = rets.mean(axis=1)
    w = np.ones((len(df.columns), )) / len(df.columns)

    # TE with the mean should be zero
    te = objective_functions.ex_post_tracking_error(w, rets, bm_rets)
    np.testing.assert_almost_equal(te, 0)

    # Should increase
    prev_te = te
    for mult in range(2, 20, 4):
        bm_rets_new = bm_rets * mult
        te = objective_functions.ex_post_tracking_error(w, rets, bm_rets_new)
        assert te > prev_te
        prev_te = te
예제 #24
0
def test_es_example():
    df = get_data()
    mu = expected_returns.mean_historical_return(df)
    historical_rets = expected_returns.returns_from_prices(df).dropna()

    es = EfficientSemivariance(mu, historical_rets)
    w = es.efficient_return(0.2)

    assert isinstance(w, dict)
    assert set(w.keys()) == set(es.tickers)
    np.testing.assert_almost_equal(es.weights.sum(), 1)
    assert all([i >= -1e-5 for i in w.values()])

    np.testing.assert_allclose(
        es.portfolio_performance(),
        (0.20, 0.08558991313395496, 2.1030523036993265),
        rtol=1e-4,
        atol=1e-4,
    )
def test_cvar_example_short():
    df = get_data()
    mu = expected_returns.mean_historical_return(df)
    historical_rets = expected_returns.returns_from_prices(df).dropna()
    cv = EfficientCVaR(
        mu,
        historical_rets,
        weight_bounds=(-1, 1),
    )
    w = cv.efficient_return(0.2, market_neutral=True)
    assert isinstance(w, dict)
    assert set(w.keys()) == set(cv.tickers)
    np.testing.assert_almost_equal(cv.weights.sum(), 0)

    np.testing.assert_allclose(
        cv.portfolio_performance(),
        (0.2, 0.013406209257292611),
        rtol=1e-4,
        atol=1e-4,
    )
def test_cvar_errors():
    df = get_data()
    mu = expected_returns.mean_historical_return(df)
    historical_rets = expected_returns.returns_from_prices(df)

    with pytest.warns(UserWarning):
        EfficientCVaR(mu, historical_rets)

    historical_rets = historical_rets.dropna(axis=0, how="any")
    assert EfficientCVaR(mu, historical_rets)

    cv = setup_efficient_cvar()

    with pytest.raises(NotImplementedError):
        cv.min_volatility()

    with pytest.raises(NotImplementedError):
        cv.max_sharpe()

    with pytest.raises(NotImplementedError):
        cv.max_quadratic_utility()

    with pytest.raises(ValueError):
        # Beta must be between 0 and 1
        cv = EfficientCVaR(mu, historical_rets, 1)

    with pytest.warns(UserWarning):
        cv = EfficientCVaR(mu, historical_rets, 0.1)

    with pytest.raises(OptimizationError):
        # Must be <= max expected return
        cv = EfficientCVaR(mu, historical_rets)
        cv.efficient_return(target_return=np.abs(mu).max() + 0.01)

    with pytest.raises(AttributeError):
        # list not supported.
        EfficientCVaR(mu, historical_rets.to_numpy().tolist())

    historical_rets = historical_rets.iloc[:, :-1]
    with pytest.raises(ValueError):
        EfficientCVaR(mu, historical_rets)
def test_cvar_example_weekly():
    beta = 0.95
    df = get_data()
    df = df.resample("W").first()
    mu = expected_returns.mean_historical_return(df, frequency=52)
    historical_rets = expected_returns.returns_from_prices(df).dropna()
    cv = EfficientCVaR(mu, historical_rets, beta=beta)
    cv.efficient_return(0.2)
    np.testing.assert_allclose(
        cv.portfolio_performance(),
        (0.2, 0.03447723250708958),
        rtol=1e-4,
        atol=1e-4,
    )

    cvar = cv.portfolio_performance()[1]
    portfolio_rets = historical_rets @ cv.weights

    var_hist = portfolio_rets.quantile(1 - beta)
    cvar_hist = -portfolio_rets[portfolio_rets < var_hist].mean()
    np.testing.assert_almost_equal(cvar_hist, cvar, decimal=3)
예제 #28
0
def test_risk_matrix_additional_kwargs():
    df = get_data()
    S = risk_models.sample_cov(df)
    S2 = risk_models.risk_matrix(df, frequency=2)
    pd.testing.assert_frame_equal(S / 126, S2)

    S = risk_models.risk_matrix(
        df, method="semicovariance", benchmark=0.0004, frequency=52
    )
    assert S.shape == (20, 20)
    assert S.notnull().all().all()
    assert risk_models._is_positive_semidefinite(S)

    S = risk_models.risk_matrix(
        expected_returns.returns_from_prices(df),
        returns_data=True,
        method="exp_cov",
        span=60,
        fix_method="diag",
    )
    assert S.shape == (20, 20)
    assert S.notnull().all().all()
    assert risk_models._is_positive_semidefinite(S)
def test_es_errors():
    df = get_data()
    mu = expected_returns.mean_historical_return(df)
    historical_rets = expected_returns.returns_from_prices(df)

    with pytest.warns(UserWarning):
        EfficientSemivariance(mu, historical_rets)

    historical_rets = historical_rets.dropna(axis=0, how="any")
    es = EfficientSemivariance(mu, historical_rets)

    with pytest.raises(NotImplementedError):
        es.min_volatility()

    with pytest.raises(NotImplementedError):
        es.max_sharpe()

    with pytest.raises(ValueError):
        # Must be > 0
        es.max_quadratic_utility(risk_aversion=-0.01)

    with pytest.raises(ValueError):
        # Must be > 0
        es.efficient_return(target_return=-0.01)

    with pytest.raises(ValueError):
        # Must be <= max expected return
        es.efficient_return(target_return=np.abs(mu).max() + 0.01)

    with pytest.raises(AttributeError):
        # list not supported.
        EfficientSemivariance(mu, historical_rets.to_numpy().tolist())

    historical_rets = historical_rets.iloc[:, :-1]
    with pytest.raises(ValueError):
        EfficientSemivariance(mu, historical_rets)
예제 #30
0
def test_cdar_example_weekly():
    beta = 0.90
    df = get_data()
    df = df.resample("W").first()
    mu = expected_returns.mean_historical_return(df, frequency=52)
    historical_rets = expected_returns.returns_from_prices(df).dropna()
    cd = EfficientCDaR(mu, historical_rets, beta=beta)
    cd.efficient_return(0.21)
    np.testing.assert_allclose(
        cd.portfolio_performance(),
        (0.21, 0.045085),
        rtol=1e-4,
        atol=1e-4,
    )

    cdar = cd.portfolio_performance()[1]
    portfolio_rets = historical_rets @ cd.weights

    cum_rets = portfolio_rets.cumsum(0)
    drawdown = cum_rets.cummax() - cum_rets

    dar_hist = drawdown.quantile(beta)
    cdar_hist = drawdown[drawdown > dar_hist].mean()
    np.testing.assert_almost_equal(cdar_hist, cdar, decimal=3)