コード例 #1
0
 def __init__(self):
     self.weights = list()
     self.seriated_correlations = None
     self.seriated_distances = None
     self.ordered_indices = None
     self.clusters = None
     self.returns_estimator = ReturnsEstimation()
コード例 #2
0
ファイル: cla.py プロジェクト: zSoftwareRepository/mlfinlab
    def __init__(self,
                 weight_bounds=(0, 1),
                 calculate_expected_returns="mean"):
        """
        Initialise the storage arrays and some preprocessing.

        :param weight_bounds: (tuple) a tuple specifying the lower and upper bound ranges for the portfolio weights
        :param calculate_expected_returns: (str) the method to use for calculation of expected returns.
                                        Currently supports "mean" and "exponential"
        """

        self.weight_bounds = weight_bounds
        self.calculate_expected_returns = calculate_expected_returns
        self.weights = list()
        self.lambdas = list()
        self.gammas = list()
        self.free_weights = list()
        self.expected_returns = None
        self.cov_matrix = None
        self.lower_bounds = None
        self.upper_bounds = None
        self.max_sharpe = None
        self.min_var = None
        self.efficient_frontier_means = None
        self.efficient_frontier_sigma = None
        self.returns_estimator = ReturnsEstimation()
コード例 #3
0
ファイル: risk_estimators.py プロジェクト: zdutta/mlfinlab
    def semi_covariance(returns, price_data=False, threshold_return=0):
        """
        Calculates the Semi-Covariance matrix for a dataframe of asset prices or returns.

        Semi-Covariance matrix is used to calculate the portfolio's downside volatility. Usually, the
        threshold return is zero and the negative volatility is measured. A threshold can be a positive number
        when one assumes a required return rate. If the threshold is above zero, the output is the volatility
        measure for returns below this threshold.

        If a dataframe of prices is given, it is transformed into a dataframe of returns using
        the calculate_returns method from the ReturnsEstimation class.

        :param returns: (pd.DataFrame) Dataframe where each column is a series of returns or prices for an asset.
        :param price_data: (bool) Flag if prices of assets are used and not returns.
        :param threshold_return: (float) Required return for each period in the frequency of the input data
                                         (If the input data is daily, it's a daily threshold return).
        :return: (np.array) Semi-Covariance matrix.
        """

        # Calculating the series of returns from series of prices
        if price_data:
            # Class with returns calculation function
            ret_est = ReturnsEstimation()

            # Calculating returns
            returns = ret_est.calculate_returns(returns)

        # Returns that are lower than the threshold
        lower_returns = returns - threshold_return < 0

        # Calculating the minimum of 0 and returns minus threshold
        min_returns = (returns - threshold_return) * lower_returns

        # Simple covariance matrix
        semi_covariance = returns.cov()

        # Iterating to fill elements
        for row_number in range(semi_covariance.shape[0]):
            for column_number in range(semi_covariance.shape[1]):
                # Series of returns for the element from the row and column
                row_asset = min_returns.iloc[:, row_number]
                column_asset = min_returns.iloc[:, column_number]

                # Series of element-wise products
                covariance_series = row_asset * column_asset

                # Element of the Semi-Covariance matrix
                semi_cov_element = covariance_series.sum() / min_returns.size

                # Inserting the element in the Semi-Covariance matrix
                semi_covariance.iloc[row_number,
                                     column_number] = semi_cov_element

        return semi_covariance
コード例 #4
0
ファイル: test_mean_variance.py プロジェクト: zdutta/mlfinlab
    def test_valuerror_with_no_asset_names(self):
        """
        Test ValueError when not supplying a list of asset names and no other input
        """

        with self.assertRaises(ValueError):
            mvo = MeanVarianceOptimisation()
            expected_returns = ReturnsEstimation().calculate_mean_historical_returns(asset_prices=self.data,
                                                                                     resample_by='W')
            covariance = ReturnsEstimation().calculate_returns(asset_prices=self.data, resample_by='W').cov()
            mvo.allocate(expected_asset_returns=expected_returns, covariance_matrix=covariance.values)
コード例 #5
0
    def setUp(self):
        """
        Initialize and get the test data
        """

        # Stock prices data to test the Covariance functions
        project_path = os.path.dirname(__file__)
        data_path = project_path + '/test_data/stock_prices.csv'
        self.data = pd.read_csv(data_path, parse_dates=True, index_col="Date")

        # And series of returns
        ret_est = ReturnsEstimation()
        self.returns = ret_est.calculate_returns(self.data)
コード例 #6
0
ファイル: test_hrp.py プロジェクト: zdutta/mlfinlab
    def test_hrp_with_input_as_covariance_matrix(self):
        """
        Test HRP when passing a covariance matrix as input.
        """

        hrp = HierarchicalRiskParity()
        returns = ReturnsEstimation().calculate_returns(asset_prices=self.data)
        hrp.allocate(asset_names=self.data.columns,
                     covariance_matrix=returns.cov())
        weights = hrp.weights.values[0]
        assert (weights >= 0).all()
        assert len(weights) == self.data.shape[1]
        np.testing.assert_almost_equal(np.sum(weights), 1)
コード例 #7
0
    def __init__(self, calculate_expected_returns='mean'):
        """
        Constructor.

        :param calculate_expected_returns: (str) the method to use for calculation of expected returns.
        Currently supports "mean" and "exponential"
        """

        self.weights = list()
        self.clusters = None
        self.ordered_indices = None
        self.returns_estimator = ReturnsEstimation()
        self.risk_metrics = RiskMetrics()
        self.calculate_expected_returns = calculate_expected_returns
コード例 #8
0
ファイル: test_mean_variance.py プロジェクト: zdutta/mlfinlab
    def test_no_asset_names_by_passing_cov(self):
        """
        Test MVO when not supplying a list of asset names but passing covariance matrix as input
        """

        mvo = MeanVarianceOptimisation()
        expected_returns = ReturnsEstimation().calculate_exponential_historical_returns(asset_prices=self.data,
                                                                                        resample_by='W')
        covariance = ReturnsEstimation().calculate_returns(asset_prices=self.data, resample_by='W').cov()
        mvo.allocate(expected_asset_returns=expected_returns, covariance_matrix=covariance)
        weights = mvo.weights.values[0]
        assert (weights >= 0).all()
        assert len(weights) == self.data.shape[1]
        np.testing.assert_almost_equal(np.sum(weights), 1)
コード例 #9
0
ファイル: test_mean_variance.py プロジェクト: zdutta/mlfinlab
    def test_exception_in_plotting_efficient_frontier(self):
        # pylint: disable=invalid-name, protected-access
        """
        Test raising of exception when plotting the efficient frontier.
        """

        mvo = MeanVarianceOptimisation()
        expected_returns = ReturnsEstimation().calculate_mean_historical_returns(asset_prices=self.data,
                                                                                 resample_by='W')
        covariance = ReturnsEstimation().calculate_returns(asset_prices=self.data, resample_by='W').cov()
        plot = mvo.plot_efficient_frontier(covariance=covariance,
                                           max_return=1.0,
                                           expected_asset_returns=expected_returns)
        assert len(plot._A) == 41
コード例 #10
0
    def test_hcaa_with_input_as_covariance_matrix(self):
        """
        Test HCAA when passing a covariance matrix as input
        """

        hcaa = HierarchicalClusteringAssetAllocation()
        returns = ReturnsEstimation().calculate_returns(asset_prices=self.data)
        hcaa.allocate(asset_names=self.data.columns,
                      covariance_matrix=returns.cov(),
                      optimal_num_clusters=6,
                      asset_returns=returns)
        weights = hcaa.weights.values[0]
        assert (weights >= 0).all()
        assert len(weights) == self.data.shape[1]
        np.testing.assert_almost_equal(np.sum(weights), 1)
コード例 #11
0
ファイル: test_mean_variance.py プロジェクト: zdutta/mlfinlab
    def test_plotting_efficient_frontier(self):
        # pylint: disable=invalid-name, protected-access
        """
        Test the plotting of the efficient frontier.
        """

        mvo = MeanVarianceOptimisation()
        expected_returns = ReturnsEstimation().calculate_mean_historical_returns(asset_prices=self.data,
                                                                                 resample_by='W')
        covariance = ReturnsEstimation().calculate_returns(asset_prices=self.data, resample_by='W').cov()
        plot = mvo.plot_efficient_frontier(covariance=covariance,
                                           expected_asset_returns=expected_returns)
        assert plot.axes.xaxis.label._text == 'Volatility'
        assert plot.axes.yaxis.label._text == 'Return'
        assert len(plot._A) == 100
コード例 #12
0
ファイル: mean_variance.py プロジェクト: yudemeirain/mlfinlab
    def __init__(self, calculate_expected_returns='mean'):
        """
        Constructor.

        :param calculate_expected_returns: (str) the method to use for calculation of expected returns.
        Currently supports "mean" and "exponential"
        """

        self.weights = list()
        self.portfolio_risk = None
        self.portfolio_return = None
        self.portfolio_sharpe_ratio = None
        self.calculate_expected_returns = calculate_expected_returns
        self.returns_estimator = ReturnsEstimation()
        self.weight_bounds = None
コード例 #13
0
    def test_mvo_with_input_as_returns_and_covariance(self):
        # pylint: disable=invalid-name, bad-continuation
        """
        Test MVO when we pass expected returns and covariance matrix as input
        """

        mvo = MeanVarianceOptimisation()
        expected_returns = ReturnsEstimation().calculate_mean_historical_returns(asset_prices=self.data, resample_by='W')
        covariance = ReturnsEstimation().calculate_returns(asset_prices=self.data, resample_by='W').cov()
        mvo.allocate(covariance_matrix=covariance,
                     expected_asset_returns=expected_returns,
                     asset_names=self.data.columns)
        weights = mvo.weights.values[0]
        assert (weights >= 0).all()
        assert len(weights) == self.data.shape[1]
        np.testing.assert_almost_equal(np.sum(weights), 1)
コード例 #14
0
ファイル: risk_estimators.py プロジェクト: zdutta/mlfinlab
    def minimum_covariance_determinant(returns,
                                       price_data=False,
                                       assume_centered=False,
                                       support_fraction=None,
                                       random_state=None):
        """
        Calculates the Minimum Covariance Determinant for a dataframe of asset prices or returns.

        This function is a wrap of the sklearn's MinCovDet (MCD) class. According to the
        scikit-learn User Guide on Covariance estimation:

        "The idea is to find a given proportion (h) of “good” observations that are not outliers
        and compute their empirical covariance matrix. This empirical covariance matrix is then
        rescaled to compensate for the performed selection of observations".

        Link to the documentation:
        <https://scikit-learn.org/stable/modules/generated/sklearn.covariance.MinCovDet.html>`_

        If a dataframe of prices is given, it is transformed into a dataframe of returns using
        the calculate_returns method from the ReturnsEstimation class.

        :param returns: (pd.DataFrame) Dataframe where each column is a series of returns or prices for an asset.
        :param price_data: (bool) Flag if prices of assets are used and not returns.
        :param assume_centered: (bool) Flag for data with mean significantly equal to zero
                                       (Read the documentation for MinCovDet class).
        :param support_fraction: (float) Values between 0 and 1. The proportion of points to be included in the support
                                         of the raw MCD estimate (Read the documentation for MinCovDet class).
        :param random_state: (int) Seed used by the random number generator.
        :return: (np.array) Estimated robust covariance matrix.
        """

        # Calculating the series of returns from series of prices
        if price_data:
            # Class with returns calculation function
            ret_est = ReturnsEstimation()

            # Calculating returns
            returns = ret_est.calculate_returns(returns)

        # Calculating the covariance matrix
        cov_matrix = MinCovDet(
            assume_centered=assume_centered,
            support_fraction=support_fraction,
            random_state=random_state).fit(returns).covariance_

        return cov_matrix
コード例 #15
0
ファイル: risk_estimators.py プロジェクト: zdutta/mlfinlab
    def exponential_covariance(returns, price_data=False, window_span=60):
        """
        Calculates the Exponentially-weighted Covariance matrix for a dataframe of asset prices or returns.

        It calculates the series of covariances between elements and then gets the last value of exponentially
        weighted moving average series from covariance series as an element in matrix.

        If a dataframe of prices is given, it is transformed into a dataframe of returns using
        the calculate_returns method from the ReturnsEstimation class.

        :param returns: (pd.DataFrame) Dataframe where each column is a series of returns or prices for an asset.
        :param price_data: (bool) Flag if prices of assets are used and not returns.
        :param window_span: (int) Used to specify decay in terms of span for the exponentially-weighted series.
        :return: (np.array) Exponentially-weighted Covariance matrix.
        """

        # Calculating the series of returns from series of prices
        if price_data:
            # Class with returns calculation function
            ret_est = ReturnsEstimation()

            # Calculating returns
            returns = ret_est.calculate_returns(returns)

        # Simple covariance matrix
        cov_matrix = returns.cov()

        # Iterating to fill elements
        for row_number in range(cov_matrix.shape[0]):
            for column_number in range(cov_matrix.shape[1]):
                # Series of returns for the element from the row and column
                row_asset = returns.iloc[:, row_number]
                column_asset = returns.iloc[:, column_number]

                # Series of covariance
                covariance_series = (row_asset - row_asset.mean()) * (
                    column_asset - column_asset.mean())

                # Exponentially weighted moving average series
                ew_ma = covariance_series.ewm(span=window_span).mean()

                # Using the most current element as the Exponential Covariance value
                cov_matrix.iloc[row_number, column_number] = ew_ma[-1]

        return cov_matrix
コード例 #16
0
    def test_cla_with_input_as_returns_and_covariance(self):
        # pylint: disable=invalid-name
        """
        Test CLA when we pass expected returns and covariance matrix as input
        """

        cla = CLA()
        expected_returns = ReturnsEstimation().calculate_mean_historical_returns(asset_prices=self.data)
        covariance = ReturnsEstimation().calculate_returns(asset_prices=self.data).cov()
        cla.allocate(covariance_matrix=covariance,
                     expected_asset_returns=expected_returns,
                     asset_names=self.data.columns)
        weights = cla.weights.values
        weights[weights <= 1e-15] = 0  # Convert very very small numbers to 0
        for turning_point in weights:
            assert (turning_point >= 0).all()
            assert len(turning_point) == self.data.shape[1]
            np.testing.assert_almost_equal(np.sum(turning_point), 1)
コード例 #17
0
ファイル: mean_variance.py プロジェクト: zdutta/mlfinlab
    def __init__(self, calculate_expected_returns='mean', risk_free_rate=0.03):
        """
        Constructor.

        :param calculate_expected_returns: (str) The method to use for calculation of expected returns.
                                                 Currently supports "mean" and "exponential".
        """

        self.weights = list()
        self.asset_names = None
        self.num_assets = None
        self.portfolio_risk = None
        self.portfolio_return = None
        self.portfolio_sharpe_ratio = None
        self.calculate_expected_returns = calculate_expected_returns
        self.returns_estimator = ReturnsEstimation()
        self.risk_estimators = RiskEstimators()
        self.weight_bounds = (0, 1)
        self.risk_free_rate = risk_free_rate
コード例 #18
0
ファイル: test_hrp.py プロジェクト: zdutta/mlfinlab
    def test_valuerror_with_no_asset_names(self):
        """
        Test ValueError when not supplying a list of asset names and no other input.
        """

        with self.assertRaises(ValueError):
            hrp = HierarchicalRiskParity()
            returns = ReturnsEstimation().calculate_returns(
                asset_prices=self.data)
            hrp.allocate(asset_returns=returns.values)
コード例 #19
0
ファイル: test_hcaa.py プロジェクト: vroomzel/mlfinlab
    def test_valuerror_with_no_asset_names(self):
        """
        Test ValueError when not supplying a list of asset names and no other input
        """

        with self.assertRaises(ValueError):
            hcaa = HierarchicalClusteringAssetAllocation()
            returns = ReturnsEstimation().calculate_returns(
                asset_prices=self.data)
            hcaa.allocate(asset_returns=returns.values, optimal_num_clusters=6)
コード例 #20
0
    def test_hcaa_sharpe_ratio_alloc_factor_less_than_one(self):
        # pylint: disable=invalid-name
        """
        Test the condition when allocation factor calculated for sharpe ratio metric is less than 0
        or greater than 1 (in which case the variance is used as the metric).
        """

        hcaa = HierarchicalClusteringAssetAllocation()
        returns = ReturnsEstimation().calculate_returns(asset_prices=self.data)
        expected_returns = returns.mean()
        expected_returns[0] = -10000
        hcaa.allocate(expected_asset_returns=expected_returns,
                      asset_names=self.data.columns,
                      covariance_matrix=returns.corr(),
                      optimal_num_clusters=5,
                      allocation_metric='sharpe_ratio')
        weights = hcaa.weights.values[0]
        assert (weights >= 0).all()
        assert len(weights) == self.data.shape[1]
        np.testing.assert_almost_equal(np.sum(weights), 1)
コード例 #21
0
    def __init__(self,
                 calculate_expected_returns='mean',
                 confidence_level=0.05):
        """
        Initialise.

        :param calculate_expected_returns: (str) The method to use for calculation of expected returns.
                                        Currently supports "mean" and "exponential"
        :param confidence_level: (float) The confidence level (alpha) used for calculating expected shortfall and conditional
                                         drawdown at risk.
        """

        self.weights = list()
        self.clusters = None
        self.ordered_indices = None
        self.cluster_children = None
        self.returns_estimator = ReturnsEstimation()
        self.risk_metrics = RiskMetrics()
        self.calculate_expected_returns = calculate_expected_returns
        self.confidence_level = confidence_level
コード例 #22
0
ファイル: test_tic.py プロジェクト: zdutta/mlfinlab
    def setUp(self):
        """
        Initialize and load data
        """

        project_path = os.path.dirname(__file__)

        # Loading the price series of ETFs
        price_data_path = project_path + '/test_data/stock_prices.csv'
        self.price_data = pd.read_csv(price_data_path,
                                      parse_dates=True,
                                      index_col="Date")

        # Transforming series of prices to series of returns
        ret_est = ReturnsEstimation()
        self.returns_data = ret_est.calculate_returns(self.price_data)

        # Loading the classification tree of ETFs
        classification_tree_path = project_path + '/test_data/classification_tree.csv'
        self.classification_tree = pd.read_csv(classification_tree_path)
コード例 #23
0
ファイル: test_hcaa.py プロジェクト: vroomzel/mlfinlab
    def test_no_asset_names_with_asset_returns(self):
        """
        Test HCAA when not supplying a list of asset names and when the user passes asset_returns.
        """

        hcaa = HierarchicalClusteringAssetAllocation()
        returns = ReturnsEstimation().calculate_returns(asset_prices=self.data)
        hcaa.allocate(asset_returns=returns, optimal_num_clusters=6)
        weights = hcaa.weights.values[0]
        assert (weights >= 0).all()
        assert len(weights) == self.data.shape[1]
        np.testing.assert_almost_equal(np.sum(weights), 1)
コード例 #24
0
ファイル: test_hrp.py プロジェクト: zdutta/mlfinlab
    def test_no_asset_names_with_asset_returns(self):
        """
        Test HRP when not supplying a list of asset names and when the user passes asset_returns.
        """

        hrp = HierarchicalRiskParity()
        returns = ReturnsEstimation().calculate_returns(asset_prices=self.data)
        hrp.allocate(asset_returns=returns)
        weights = hrp.weights.values[0]
        assert (weights >= 0).all()
        assert len(weights) == self.data.shape[1]
        np.testing.assert_almost_equal(np.sum(weights), 1)
コード例 #25
0
    def test_hcaa_with_input_as_returns(self):
        """
        Test HCAA when passing asset returns dataframe as input
        """

        hcaa = HierarchicalClusteringAssetAllocation()
        returns = ReturnsEstimation().calculate_returns(asset_prices=self.data)
        hcaa.allocate(asset_returns=returns, asset_names=self.data.columns)
        weights = hcaa.weights.values[0]
        assert (weights >= 0).all()
        assert len(weights) == self.data.shape[1]
        np.testing.assert_almost_equal(np.sum(weights), 1)
コード例 #26
0
ファイル: risk_estimators.py プロジェクト: zdutta/mlfinlab
    def empirical_covariance(returns, price_data=False, assume_centered=False):
        """
        Calculates the Maximum likelihood covariance estimator for a dataframe of asset prices or returns.

        This function is a wrap of the sklearn's EmpiricalCovariance class. According to the
        scikit-learn User Guide on Covariance estimation:

        "The covariance matrix of a data set is known to be well approximated by the classical maximum
        likelihood estimator, provided the number of observations is large enough compared to the number
        of features (the variables describing the observations). More precisely, the Maximum Likelihood
        Estimator of a sample is an unbiased estimator of the corresponding population’s covariance matrix".

        Link to the documentation:
        <https://scikit-learn.org/stable/modules/generated/sklearn.covariance.EmpiricalCovariance.html>`_

        If a dataframe of prices is given, it is transformed into a dataframe of returns using
        the calculate_returns method from the ReturnsEstimation class.

        :param returns: (pd.DataFrame) Dataframe where each column is a series of returns or prices for an asset.
        :param price_data: (bool) Flag if prices of assets are used and not returns.
        :param assume_centered: (bool) Flag for data with mean almost, but not exactly zero
                                       (Read documentation for EmpiricalCovariance class).
        :return: (np.array) Estimated covariance matrix.
        """

        # Calculating the series of returns from series of prices
        if price_data:
            # Class with returns calculation function
            ret_est = ReturnsEstimation()

            # Calculating returns
            returns = ret_est.calculate_returns(returns)

        # Calculating the covariance matrix
        cov_matrix = EmpiricalCovariance(
            assume_centered=assume_centered).fit(returns).covariance_

        return cov_matrix
コード例 #27
0
ファイル: test_hrp.py プロジェクト: zdutta/mlfinlab
    def test_hrp_with_input_as_distance_matrix(self):
        """
        Test HRP when passing a distance matrix as input.
        """

        hrp = HierarchicalRiskParity()
        returns = ReturnsEstimation().calculate_returns(asset_prices=self.data)
        covariance = returns.cov()
        d_matrix = np.zeros_like(covariance)
        diagnoal_sqrt = np.sqrt(np.diag(covariance))
        np.fill_diagonal(d_matrix, diagnoal_sqrt)
        d_inv = np.linalg.inv(d_matrix)
        corr = np.dot(np.dot(d_inv, covariance), d_inv)
        corr = pd.DataFrame(corr,
                            index=covariance.columns,
                            columns=covariance.columns)
        distance_matrix = np.sqrt((1 - corr).round(5) / 2)
        hrp.allocate(asset_names=self.data.columns,
                     covariance_matrix=covariance,
                     distance_matrix=distance_matrix)
        weights = hrp.weights.values[0]
        self.assertTrue((weights >= 0).all())
        self.assertTrue(len(weights) == self.data.shape[1])
        self.assertAlmostEqual(np.sum(weights), 1)
コード例 #28
0
ファイル: mean_variance.py プロジェクト: yudemeirain/mlfinlab
class MeanVarianceOptimisation:
    """
    This class implements some classic mean-variance optimisation techniques for calculating the efficient frontier solutions.
    With the help of quadratic optimisers, users can generate optimal portfolios for different objective functions. Currently
    solutions to the following portfolios can be generated:
        1. Inverse Variance
        2. Maximum Sharpe
        3. Minimum Volatility
        4. Efficient Risk
    """
    def __init__(self, calculate_expected_returns='mean'):
        """
        Constructor.

        :param calculate_expected_returns: (str) the method to use for calculation of expected returns.
        Currently supports "mean" and "exponential"
        """

        self.weights = list()
        self.portfolio_risk = None
        self.portfolio_return = None
        self.portfolio_sharpe_ratio = None
        self.calculate_expected_returns = calculate_expected_returns
        self.returns_estimator = ReturnsEstimation()
        self.weight_bounds = None

    def allocate(self,
                 asset_names,
                 asset_prices=None,
                 expected_asset_returns=None,
                 covariance_matrix=None,
                 solution='inverse_variance',
                 risk_free_rate=0.05,
                 target_return=0.2,
                 weight_bounds=(0, 1),
                 resample_by=None):
        # pylint: disable=invalid-name, too-many-branches, bad-continuation
        """
        Calculate the portfolio asset allocations using the method specified.

        :param asset_names: (list) a list of strings containing the asset names
        :param asset_prices: (pd.Dataframe) a dataframe of historical asset prices (daily close)
        :param expected_asset_returns: (list/np.array/pd.dataframe) a list of mean stock returns (mu)
        :param covariance_matrix: (pd.Dataframe/numpy matrix) user supplied covariance matrix of asset returns (sigma)
        :param solution: (str) the type of solution/algorithm to use to calculate the weights.
                               Currently supported solution strings - inverse_variance, min_volatility, max_sharpe and
                               efficient_risk
        :param risk_free_rate: (float) the rate of return for a risk-free asset.
        :param target_return: (float) target return of the portfolio
        :param weight_bounds: (dict/tuple) can be either a single tuple of upper and lower bounds
                                          for all portfolio weights or a dictionary mapping of individual asset indices
                                          to tuples of upper and lower bounds. Those indices which do not have any mapping
                                          will have a (0, 1) default bound.
        :param resample_by: (str) specifies how to resample the prices - weekly, daily, monthly etc.. Defaults to
                                  None for no resampling
        """

        if asset_prices is None and expected_asset_returns is None and covariance_matrix is None:
            raise ValueError(
                "You need to supply either raw prices or expected returns "
                "and a covariance matrix of asset returns")

        if asset_prices is not None:
            if not isinstance(asset_prices, pd.DataFrame):
                raise ValueError("Asset prices matrix must be a dataframe")
            if not isinstance(asset_prices.index, pd.DatetimeIndex):
                raise ValueError(
                    "Asset prices dataframe must be indexed by date.")

        # Weight bounds
        self.weight_bounds = weight_bounds

        # Calculate the expected returns if the user does not supply any returns
        if expected_asset_returns is None:
            if self.calculate_expected_returns == "mean":
                expected_asset_returns = self.returns_estimator.calculate_mean_historical_returns(
                    asset_prices=asset_prices, resample_by=resample_by)
            elif self.calculate_expected_returns == "exponential":
                expected_asset_returns = self.returns_estimator.calculate_exponential_historical_returns(
                    asset_prices=asset_prices, resample_by=resample_by)
            else:
                raise ValueError(
                    "Unknown returns specified. Supported returns - mean, exponential"
                )
        expected_asset_returns = np.array(expected_asset_returns).reshape(
            (len(expected_asset_returns), 1))

        # Calculate covariance of returns or use the user specified covariance matrix
        if covariance_matrix is None:
            returns = self.returns_estimator.calculate_returns(
                asset_prices=asset_prices, resample_by=resample_by)
            covariance_matrix = returns.cov()
        cov = pd.DataFrame(covariance_matrix,
                           index=asset_names,
                           columns=asset_names)

        if solution == 'inverse_variance':
            self.weights = self._inverse_variance(covariance=cov)
        elif solution == 'min_volatility':
            self.weights, self.portfolio_risk = self._min_volatility(
                covariance=cov, num_assets=len(asset_names))
        elif solution == 'max_sharpe':
            self.weights, self.portfolio_risk, self.portfolio_return = self._max_sharpe(
                covariance=cov,
                expected_returns=expected_asset_returns,
                risk_free_rate=risk_free_rate,
                num_assets=len(asset_names))
        elif solution == 'efficient_risk':
            self.weights, self.portfolio_risk, self.portfolio_return = self._min_volatility_for_target_return(
                covariance=cov,
                expected_returns=expected_asset_returns,
                target_return=target_return,
                num_assets=len(asset_names))
        else:
            raise ValueError(
                "Unknown solution string specified. Supported solutions - "
                "inverse_variance, min_volatility, max_sharpe and efficient_risk."
            )

        # Round weights which are very very small negative numbers (e.g. -4.7e-16) to 0
        negative_weight_indices = np.argwhere(self.weights < 0)
        self.weights[negative_weight_indices] = np.round(
            self.weights[negative_weight_indices], 3)

        # Calculate the portfolio risk and return if it has not been calculated
        if self.portfolio_risk is None:
            self.portfolio_risk = np.dot(self.weights,
                                         np.dot(cov.values, self.weights.T))
        if self.portfolio_return is None:
            self.portfolio_return = np.dot(self.weights,
                                           expected_asset_returns)
        self.portfolio_sharpe_ratio = (
            (self.portfolio_return - risk_free_rate) /
            (self.portfolio_risk**0.5))

        self.weights = pd.DataFrame(self.weights)
        self.weights.index = asset_names
        self.weights = self.weights.T

    @staticmethod
    def _inverse_variance(covariance):
        """
        Calculate weights using inverse-variance allocation.

        :param covariance: (pd.Dataframe) covariance dataframe of asset returns
        :return: (np.array) array of portfolio weights
        """

        ivp = 1. / np.diag(covariance)
        ivp /= ivp.sum()
        return ivp

    def _min_volatility(self, covariance, num_assets):
        """
        Compute minimum volatility portfolio allocation.

        :param covariance: (pd.Dataframe) covariance dataframe of asset returns
        :param num_assets: (int) number of assets in the portfolio
        :return: (np.array, float) portfolio weights and risk value
        """

        weights = cp.Variable(num_assets)
        weights.value = np.array([1 / num_assets] * num_assets)
        risk = cp.quad_form(weights, covariance)

        # Optimisation objective and constraints
        allocation_objective = cp.Minimize(risk)
        allocation_constraints = [
            cp.sum(weights) == 1,
        ]
        if isinstance(self.weight_bounds, tuple):
            allocation_constraints.extend([
                weights >= self.weight_bounds[0],
                weights <= min(self.weight_bounds[1], 1)
            ])
        if isinstance(self.weight_bounds, dict):
            asset_indices = list(range(num_assets))
            for asset_index in asset_indices:
                lower_bound, upper_bound = self.weight_bounds.get(
                    asset_index, (0, 1))
                allocation_constraints.extend([
                    weights[asset_index] >= lower_bound,
                    weights[asset_index] <= min(upper_bound, 1)
                ])

        # Define and solve the problem
        problem = cp.Problem(objective=allocation_objective,
                             constraints=allocation_constraints)
        problem.solve(warm_start=True)
        if weights.value is None:
            raise ValueError('No optimal set of weights found.')
        return weights.value, risk.value**0.5

    def _max_sharpe(self, covariance, expected_returns, risk_free_rate,
                    num_assets):
        # pylint: disable=invalid-name
        """
        Compute maximum Sharpe portfolio allocation.

        :param covariance: (pd.Dataframe) covariance dataframe of asset returns
        :param expected_asset_returns: (list/np.array/pd.dataframe) a list of mean stock returns (mu)
        :param risk_free_rate: (float) the rate of return for a risk-free asset.
        :param num_assets: (int) number of assets in the portfolio
        :return: (np.array, float, float) portfolio weights, risk value and return value
        """

        y = cp.Variable(num_assets)
        y.value = np.array([1 / num_assets] * num_assets)
        kappa = cp.Variable(1)
        risk = cp.quad_form(y, covariance)

        # Optimisation objective and constraints
        allocation_objective = cp.Minimize(risk)
        allocation_constraints = [
            cp.sum((expected_returns - risk_free_rate).T @ y) == 1,
            cp.sum(y) == kappa, kappa >= 0
        ]
        if isinstance(self.weight_bounds, tuple):
            allocation_constraints.extend([
                y >= kappa * self.weight_bounds[0],
                y <= kappa * self.weight_bounds[1]
            ])
        if isinstance(self.weight_bounds, dict):
            asset_indices = list(range(num_assets))
            for asset_index in asset_indices:
                lower_bound, upper_bound = self.weight_bounds.get(
                    asset_index, (0, 1))
                allocation_constraints.extend([
                    y[asset_index] >= kappa * lower_bound,
                    y[asset_index] <= kappa * upper_bound
                ])

        # Define and solve the problem
        problem = cp.Problem(objective=allocation_objective,
                             constraints=allocation_constraints)
        problem.solve(warm_start=True)
        if y.value is None or kappa.value is None:
            raise ValueError('No optimal set of weights found.')
        weights = y.value / kappa.value
        portfolio_return = (expected_returns.T @ weights)[0]
        return weights, risk.value**0.5, portfolio_return

    def _min_volatility_for_target_return(self, covariance, expected_returns,
                                          target_return, num_assets):
        """
        Calculate minimum volatility portfolio for a given target return.

        :param covariance: (pd.Dataframe) covariance dataframe of asset returns
        :param expected_asset_returns: (list/np.array/pd.dataframe) a list of mean stock returns (mu)
        :param target_return: (float) target return of the portfolio
        :param num_assets: (int) number of assets in the portfolio
        :return: (np.array, float, float) portfolio weights, risk value and return value
        """

        weights = cp.Variable(num_assets)
        risk = cp.quad_form(weights, covariance)

        # Optimisation objective and constraints
        allocation_objective = cp.Minimize(risk)
        allocation_constraints = [
            cp.sum(weights) == 1,
            (expected_returns.T @ weights)[0] == target_return,
        ]
        if isinstance(self.weight_bounds, tuple):
            allocation_constraints.extend([
                weights >= self.weight_bounds[0],
                weights <= min(self.weight_bounds[1], 1)
            ])
        if isinstance(self.weight_bounds, dict):
            asset_indices = list(range(num_assets))
            for asset_index in asset_indices:
                lower_bound, upper_bound = self.weight_bounds.get(
                    asset_index, (0, 1))
                allocation_constraints.extend([
                    weights[asset_index] >= lower_bound,
                    weights[asset_index] <= min(upper_bound, 1)
                ])

        # Define and solve the problem
        problem = cp.Problem(objective=allocation_objective,
                             constraints=allocation_constraints)
        problem.solve()
        if weights.value is None:
            raise ValueError('No optimal set of weights found.')
        return weights.value, risk.value**0.5, target_return

    def plot_efficient_frontier(self,
                                covariance,
                                expected_asset_returns,
                                num_assets,
                                min_return=0,
                                max_return=0.4,
                                risk_free_rate=0.05):
        # pylint: disable=bad-continuation, broad-except
        """
        Plot the Markowitz efficient frontier.

        :param covariance: (pd.Dataframe) covariance dataframe of asset returns
        :param expected_asset_returns: (list/np.array/pd.dataframe) a list of mean stock returns (mu)
        :param num_assets: (int) number of assets in the portfolio
        :param min_return: (float) minimum target return
        :param max_return: (float) maximum target return
        :param risk_free_rate: (float) the rate of return for a risk-free asset.
        """

        expected_returns = np.array(expected_asset_returns).reshape(
            (len(expected_asset_returns), 1))
        volatilities = []
        returns = []
        sharpe_ratios = []
        for portfolio_return in np.linspace(min_return, max_return, 100):
            _, risk, _ = self._min_volatility_for_target_return(
                covariance=covariance,
                expected_returns=expected_returns,
                target_return=portfolio_return,
                num_assets=num_assets)
            volatilities.append(risk)
            returns.append(portfolio_return)
            sharpe_ratios.append(
                (portfolio_return - risk_free_rate) / (risk**0.5 + 1e-16))
        max_sharpe_ratio_index = sharpe_ratios.index(max(sharpe_ratios))
        min_volatility_index = volatilities.index(min(volatilities))
        figure = plt.scatter(volatilities,
                             returns,
                             c=sharpe_ratios,
                             cmap='viridis')
        plt.colorbar(label='Sharpe Ratio')
        plt.scatter(volatilities[max_sharpe_ratio_index],
                    returns[max_sharpe_ratio_index],
                    marker='*',
                    color='g',
                    s=400,
                    label='Maximum Sharpe Ratio')
        plt.scatter(volatilities[min_volatility_index],
                    returns[min_volatility_index],
                    marker='*',
                    color='r',
                    s=400,
                    label='Minimum Volatility')
        plt.xlabel('Volatility')
        plt.ylabel('Return')
        plt.legend(loc='upper left')
        return figure
コード例 #29
0
ファイル: hrp.py プロジェクト: tenterfieldvietthai/mlfinlab
class HierarchicalRiskParity:
    """
    This class implements the Hierarchical Risk Parity algorithm mentioned in the following paper: `López de Prado, Marcos,
    Building Diversified Portfolios that Outperform Out-of-Sample (May 23, 2016). Journal of Portfolio Management,
    2016 <https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2708678>`_; The code is reproduced with modification from his book:
    Advances in Financial Machine Learning, Chp-16
    By removing exact analytical approach to the calculation of weights and instead relying on an approximate
    machine learning based approach (hierarchical tree-clustering), Hierarchical Risk Parity produces weights which are stable to
    random shocks in the stock-market. Moreover, previous algorithms like CLA involve the inversion of covariance matrix which is
    a highly unstable operation and tends to have major impacts on the performance due to slight changes in the covariance matrix.
    By removing dependence on the inversion of covariance matrix completely, the Hierarchical Risk Parity algorithm is fast,
    robust and flexible.
    """
    def __init__(self):
        self.weights = list()
        self.seriated_distances = None
        self.seriated_correlations = None
        self.ordered_indices = None
        self.clusters = None
        self.returns_estimator = ReturnsEstimation()
        self.risk_metrics = RiskMetrics()

    def allocate(self,
                 asset_names=None,
                 asset_prices=None,
                 asset_returns=None,
                 covariance_matrix=None,
                 distance_matrix=None,
                 side_weights=None,
                 linkage_method='single',
                 resample_by=None,
                 use_shrinkage=False):
        # pylint: disable=invalid-name, too-many-branches
        """
        Calculate asset allocations using HRP algorithm.

        :param asset_names: (list) A list of strings containing the asset names
        :param asset_prices: (pd.Dataframe) A dataframe of historical asset prices (daily close)
                                            indexed by date
        :param asset_returns: (pd.Dataframe/numpy matrix) User supplied matrix of asset returns
        :param covariance_matrix: (pd.Dataframe/numpy matrix) User supplied covariance matrix of asset returns
        :param distance_matrix: (pd.Dataframe/numpy matrix) User supplied distance matrix
        :param side_weights: (pd.Series/numpy matrix) With asset_names in index and value 1 for Buy, -1 for Sell
                                                      (default 1 for all)
        :param linkage: (string) Type of linkage used for Hierarchical Clustering ex: single, average, complete...
        :param resample_by: (str) Specifies how to resample the prices - weekly, daily, monthly etc.. Defaults to
                                  None for no resampling
        :param use_shrinkage: (bool) Specifies whether to shrink the covariances
        """

        if asset_prices is None and asset_returns is None and covariance_matrix is None:
            raise ValueError(
                "You need to supply either raw prices or returns or a covariance matrix of asset returns"
            )

        if asset_prices is not None:
            if not isinstance(asset_prices, pd.DataFrame):
                raise ValueError("Asset prices matrix must be a dataframe")
            if not isinstance(asset_prices.index, pd.DatetimeIndex):
                raise ValueError(
                    "Asset prices dataframe must be indexed by date.")

        if asset_names is None:
            if asset_prices is not None:
                asset_names = asset_prices.columns
            elif asset_returns is not None and isinstance(
                    asset_returns, pd.DataFrame):
                asset_names = asset_returns.columns
            else:
                raise ValueError("Please provide a list of asset names")

        # Calculate the returns if the user does not supply a returns dataframe
        if asset_returns is None and covariance_matrix is None:
            asset_returns = self.returns_estimator.calculate_returns(
                asset_prices=asset_prices, resample_by=resample_by)
        asset_returns = pd.DataFrame(asset_returns, columns=asset_names)

        # Calculate covariance of returns or use the user specified covariance matrix
        if covariance_matrix is None:
            if use_shrinkage:
                covariance_matrix = self._shrink_covariance(
                    asset_returns=asset_returns)
            else:
                covariance_matrix = asset_returns.cov()
        covariance_matrix = pd.DataFrame(covariance_matrix,
                                         index=asset_names,
                                         columns=asset_names)

        # Calculate correlation and distance from covariance matrix
        correlation_matrix = self._cov2corr(covariance=covariance_matrix)
        if distance_matrix is None:
            distance_matrix = np.sqrt((1 - correlation_matrix).round(5) / 2)
        distance_matrix = pd.DataFrame(distance_matrix,
                                       index=asset_names,
                                       columns=asset_names)

        # Step-1: Tree Clustering
        self.clusters = self._tree_clustering(distance=distance_matrix,
                                              method=linkage_method)

        # Step-2: Quasi Diagnalization
        num_assets = len(asset_names)
        self.ordered_indices = self._quasi_diagnalization(
            num_assets, 2 * num_assets - 2)
        self.seriated_distances, self.seriated_correlations = self._get_seriated_matrix(
            assets=asset_names,
            distance=distance_matrix,
            correlation=correlation_matrix)

        if side_weights is None:
            side_weights = pd.Series([1] * num_assets, index=asset_names)
        side_weights = pd.Series(side_weights, index=asset_names)

        # Step-3: Recursive Bisection
        self._recursive_bisection(covariance=covariance_matrix,
                                  assets=asset_names,
                                  side_weights=side_weights)

    @staticmethod
    def _tree_clustering(distance, method='single'):
        """
        Perform the traditional heirarchical tree clustering.

        :param correlation: (np.array) Correlation matrix of the assets
        :param method: (str) The type of clustering to be done
        :return: (np.array) Distance matrix and clusters
        """
        clusters = linkage(squareform(distance.values), method=method)
        return clusters

    def _quasi_diagnalization(self, num_assets, curr_index):
        """
        Rearrange the assets to reorder them according to hierarchical tree clustering order.

        :param num_assets: (int) The total number of assets
        :param curr_index: (int) Current index
        :return: (list) The assets rearranged according to hierarchical clustering
        """

        if curr_index < num_assets:
            return [curr_index]

        left = int(self.clusters[curr_index - num_assets, 0])
        right = int(self.clusters[curr_index - num_assets, 1])

        return (self._quasi_diagnalization(num_assets, left) +
                self._quasi_diagnalization(num_assets, right))

    def _get_seriated_matrix(self, assets, distance, correlation):
        """
        Based on the quasi-diagnalization, reorder the original distance matrix, so that assets within
        the same cluster are grouped together.

        :param assets: (list) Asset names in the portfolio
        :param distance: (pd.Dataframe) Distance values between asset returns
        :param correlation: (pd.Dataframe) Correlations between asset returns
        :return: (np.array) Re-arranged distance matrix based on tree clusters
        """

        ordering = assets[self.ordered_indices]
        seriated_distances = distance.loc[ordering, ordering]
        seriated_correlations = correlation.loc[ordering, ordering]
        return seriated_distances, seriated_correlations

    @staticmethod
    def _get_inverse_variance_weights(covariance):
        """
        Calculate the inverse variance weight allocations.

        :param covariance: (pd.Dataframe) Covariance matrix of assets
        :return: (list) Inverse variance weight values
        """

        inv_diag = 1 / np.diag(covariance.values)
        parity_w = inv_diag * (1 / np.sum(inv_diag))
        return parity_w

    def _get_cluster_variance(self, covariance, cluster_indices):
        """
        Calculate cluster variance.

        :param covariance: (pd.Dataframe) Covariance matrix of assets
        :param cluster_indices: (list) Asset indices for the cluster
        :return: (float) Variance of the cluster
        """

        cluster_covariance = covariance.iloc[cluster_indices, cluster_indices]
        parity_w = self._get_inverse_variance_weights(cluster_covariance)
        cluster_variance = self.risk_metrics.calculate_variance(
            covariance=cluster_covariance, weights=parity_w)
        return cluster_variance

    def _recursive_bisection(self, covariance, assets, side_weights):
        """
        Recursively assign weights to the clusters - ultimately assigning weights to the individual assets.

        :param covariance: (pd.Dataframe) The covariance matrix
        :param assets: (list) Asset names in the portfolio
        """
        self.weights = pd.Series(1, index=self.ordered_indices)
        clustered_alphas = [self.ordered_indices]

        while clustered_alphas:
            clustered_alphas = [
                cluster[start:end] for cluster in clustered_alphas
                for start, end in ((0, len(cluster) // 2), (len(cluster) // 2,
                                                            len(cluster)))
                if len(cluster) > 1
            ]

            for subcluster in range(0, len(clustered_alphas), 2):
                left_cluster = clustered_alphas[subcluster]
                right_cluster = clustered_alphas[subcluster + 1]

                # Get left and right cluster variances and calculate allocation factor
                left_cluster_variance = self._get_cluster_variance(
                    covariance, left_cluster)
                right_cluster_variance = self._get_cluster_variance(
                    covariance, right_cluster)
                alloc_factor = 1 - left_cluster_variance / (
                    left_cluster_variance + right_cluster_variance)

                # Assign weights to each sub-cluster
                self.weights[left_cluster] *= alloc_factor
                self.weights[right_cluster] *= 1 - alloc_factor

        # Assign actual asset values to weight index
        self.weights.index = assets[self.ordered_indices]
        self.weights = pd.DataFrame(self.weights)

        # Build Long/Short portfolio if needed
        short_ptf = side_weights[side_weights == -1].index
        buy_ptf = side_weights[side_weights == 1].index
        if not short_ptf.empty:
            # Short half size
            self.weights.loc[short_ptf] /= self.weights.loc[short_ptf].sum(
            ).values[0]
            self.weights.loc[short_ptf] *= -0.5
            # Buy other half
            self.weights.loc[buy_ptf] /= self.weights.loc[buy_ptf].sum(
            ).values[0]
            self.weights.loc[buy_ptf] *= 0.5
        self.weights = self.weights.T

    def plot_clusters(self, assets):
        """
        Plot a dendrogram of the hierarchical clusters.

        :param assets: (list) Asset names in the portfolio
        :return: (dict) Dendrogram
        """

        dendrogram_plot = dendrogram(self.clusters, labels=assets)
        return dendrogram_plot

    @staticmethod
    def _shrink_covariance(asset_returns):
        """
        Regularise/Shrink the asset covariances.

        :param asset_returns: (pd.Dataframe) Asset returns
        :return: (pd.Dataframe) Shrinked asset returns covariances
        """

        oas = OAS()
        oas.fit(asset_returns)
        shrinked_covariance = oas.covariance_
        return shrinked_covariance

    @staticmethod
    def _cov2corr(covariance):
        """
        Calculate the correlations from asset returns covariance matrix.

        :param covariance: (pd.Dataframe) Asset returns covariances
        :return: (pd.Dataframe) Correlations between asset returns
        """

        d_matrix = np.zeros_like(covariance)
        diagnoal_sqrt = np.sqrt(np.diag(covariance))
        np.fill_diagonal(d_matrix, diagnoal_sqrt)
        d_inv = np.linalg.inv(d_matrix)
        corr = np.dot(np.dot(d_inv, covariance), d_inv)
        corr = pd.DataFrame(corr,
                            index=covariance.columns,
                            columns=covariance.columns)
        return corr
コード例 #30
0
class HierarchicalClusteringAssetAllocation:
    """
    This class implements the Hierarchical Equal Risk Contribution (HERC) algorithm and it's extended components mentioned in the
    following papers: `Raffinot, Thomas, The Hierarchical Equal Risk Contribution Portfolio (August 23,
    2018). <https://ssrn.com/abstract=3237540>`_; and `Raffinot, Thomas, Hierarchical Clustering Based Asset Allocation (May 2017)
    <https://ssrn.com/abstract=2840729>`_;

    While the vanilla Hierarchical Risk Parity algorithm uses only the variance as a risk measure for assigning weights, the HERC
    algorithm proposed by Raffinot, allows investors to use other risk metrics like Expected Shortfall, Sharpe Ratio and
    Conditional Drawdown. Furthermore, it is flexible enough to be easily extended to include custom risk measures of our own.
    """

    def __init__(self, calculate_expected_returns='mean'):
        """
        Constructor.

        :param calculate_expected_returns: (str) the method to use for calculation of expected returns.
        Currently supports "mean" and "exponential"
        """

        self.weights = list()
        self.clusters = None
        self.ordered_indices = None
        self.returns_estimator = ReturnsEstimation()
        self.risk_metrics = RiskMetrics()
        self.calculate_expected_returns = calculate_expected_returns

    @staticmethod
    def _compute_cluster_inertia(labels, asset_returns):
        """
        Calculate the cluster inertia (within cluster sum-of-squares).

        :param labels: (list) cluster labels
        :param asset_returns: (pd.DataFrame) historical asset returns
        :return: (float) cluster inertia value
        """

        unique_labels = np.unique(labels)
        inertia = [np.mean(pairwise_distances(asset_returns[:, labels == label])) for label in unique_labels]
        inertia = np.log(np.sum(inertia))
        return inertia

    def _get_optimal_number_of_clusters(self,
                                        correlation,
                                        asset_returns,
                                        num_reference_datasets=5,
                                        max_number_of_clusters=10):
        """
        Find the optimal number of clusters for hierarchical clustering using the Gap statistic.

        :param correlation: (np.array) matrix of asset correlations
        :param asset_returns: (pd.DataFrame) historical asset returns
        :param num_reference_datasets: (int) the number of reference datasets to generate for calculating expected inertia
        :param max_number_of_clusters: (int) the maximum number of clusters to check for finding the optimal value
        :return: (int) the optimal number of clusters
        """

        cluster_func = AgglomerativeClustering(affinity='precomputed', linkage='single')
        original_distance_matrix = np.sqrt(2 * (1 - correlation).round(5))
        gap_values = []
        for num_clusters in range(1, max_number_of_clusters + 1):
            cluster_func.n_clusters = num_clusters

            # Calculate expected inertia from reference datasets
            reference_inertias = []
            for _ in range(num_reference_datasets):

                # Generate reference returns from uniform distribution and calculate the distance matrix.
                reference_asset_returns = pd.DataFrame(np.random.rand(*asset_returns.shape))
                reference_correlation = np.array(reference_asset_returns.corr())
                reference_distance_matrix = np.sqrt(2 * (1 - reference_correlation).round(5))

                reference_cluster_assignments = cluster_func.fit_predict(reference_distance_matrix)
                inertia = self._compute_cluster_inertia(reference_cluster_assignments, reference_asset_returns.values)
                reference_inertias.append(inertia)
            expected_inertia = np.mean(reference_inertias)

            # Calculate inertia from original data
            original_cluster_asignments = cluster_func.fit_predict(original_distance_matrix)
            inertia = self._compute_cluster_inertia(original_cluster_asignments, asset_returns.values)

            # Calculate the gap statistic
            gap = expected_inertia - inertia
            gap_values.append(gap)

        return np.argmax(gap_values)

    @staticmethod
    def _tree_clustering(correlation, num_clusters):
        """
        Perform agglomerative clustering on the current portfolio.

        :param correlation: (np.array) matrix of asset correlations
        :param num_clusters: (int) the number of clusters
        :return: (list) structure of hierarchical tree
        """

        cluster_func = AgglomerativeClustering(n_clusters=num_clusters,
                                               affinity='precomputed',
                                               linkage='single')
        distance_matrix = np.sqrt(2 * (1 - correlation).round(5))
        cluster_func.fit(distance_matrix)
        return cluster_func.children_

    def _quasi_diagnalization(self, num_assets, curr_index):
        """
        Rearrange the assets to reorder them according to hierarchical tree clustering order.

        :param num_assets: (int) the total number of assets
        :param curr_index: (int) current index
        :return: (list) the assets rearranged according to hierarchical clustering
        """

        if curr_index < num_assets:
            return [curr_index]

        left = int(self.clusters[curr_index - num_assets, 0])
        right = int(self.clusters[curr_index - num_assets, 1])

        return (self._quasi_diagnalization(num_assets, left) + self._quasi_diagnalization(num_assets, right))

    @staticmethod
    def _get_inverse_variance_weights(covariance):
        '''
        Calculate the inverse variance weight allocations.

        :param covariance: (pd.DataFrame) covariance matrix of assets
        :return: (list) inverse variance weight values
        '''

        inv_diag = 1 / np.diag(covariance.values)
        parity_w = inv_diag * (1 / np.sum(inv_diag))
        return parity_w

    def _get_cluster_variance(self, covariance, cluster_indices):
        """
        Calculate cluster variance.

        :param covariance: (pd.DataFrame) covariance matrix of assets
        :param cluster_indices: (list) list of asset indices for the cluster
        :return: (float) variance of the cluster
        """

        cluster_covariance = covariance.iloc[cluster_indices, cluster_indices]
        parity_w = self._get_inverse_variance_weights(cluster_covariance)
        cluster_variance = self.risk_metrics.calculate_variance(covariance=cluster_covariance, weights=parity_w)
        return cluster_variance

    def _get_cluster_sharpe_ratio(self, expected_asset_returns, covariance, cluster_indices):
        """
        Calculate cluster Sharpe Ratio.

        :param expected_asset_returns: (list) a list of mean asset returns (mu)
        :param covariance: (pd.DataFrame) covariance matrix of assets
        :param cluster_indices: (list) list of asset indices for the cluster
        :return: (float) sharpe ratio of the cluster
        """

        cluster_expected_returns = expected_asset_returns[cluster_indices]
        cluster_covariance = covariance.iloc[cluster_indices, cluster_indices]
        parity_w = self._get_inverse_variance_weights(cluster_covariance)
        cluster_variance = self.risk_metrics.calculate_variance(covariance=cluster_covariance, weights=parity_w)
        cluster_sharpe_ratio = (parity_w @ cluster_expected_returns) / np.sqrt(cluster_variance)
        return cluster_sharpe_ratio

    def _get_cluster_expected_shortfall(self, asset_returns, covariance, confidence_level, cluster_indices):
        """
        Calculate cluster expected shortfall.

        :param asset_returns: (pd.DataFrame) historical asset returns
        :param covariance: (pd.DataFrame) covariance matrix of assets
        :param confidence_level: (float) the confidence level (alpha)
        :param cluster_indices: (list) list of asset indices for the cluster
        :return: (float) expected shortfall of the cluster
        """

        cluster_asset_returns = asset_returns.iloc[:, cluster_indices]
        cluster_covariance = covariance.iloc[cluster_indices, cluster_indices]
        parity_w = self._get_inverse_variance_weights(cluster_covariance)
        portfolio_returns = cluster_asset_returns @ parity_w
        cluster_expected_shortfall = self.risk_metrics.calculate_expected_shortfall(returns=portfolio_returns,
                                                                                    confidence_level=confidence_level)
        return cluster_expected_shortfall

    def _get_cluster_conditional_drawdown_at_risk(self, asset_returns, covariance, confidence_level, cluster_indices):
        """
        Calculate cluster conditional drawdown at risk.

        :param asset_returns: (pd.DataFrame) historical asset returns
        :param covariance: (pd.DataFrame) covariance matrix of assets
        :param confidence_level: (float) the confidence level (alpha)
        :param cluster_indices: (list) list of asset indices for the cluster
        :return: (float) CDD of the cluster
        """

        cluster_asset_returns = asset_returns.iloc[:, cluster_indices]
        cluster_covariance = covariance.iloc[cluster_indices, cluster_indices]
        parity_w = self._get_inverse_variance_weights(cluster_covariance)
        portfolio_returns = cluster_asset_returns @ parity_w
        cluster_conditional_drawdown = self.risk_metrics.calculate_conditional_drawdown_risk(returns=portfolio_returns,
                                                                                             confidence_level=confidence_level)
        return cluster_conditional_drawdown

    def _recursive_bisection(self,
                             expected_asset_returns,
                             asset_returns,
                             covariance_matrix,
                             assets,
                             allocation_metric,
                             confidence_level):
        # pylint: disable=bad-continuation, too-many-locals
        """
        Recursively assign weights to the clusters - ultimately assigning weights to the individual assets.

        :param expected_asset_returns: (list) a list of mean asset returns (mu)
        :param asset_returns: (pd.DataFrame) historical asset returns
        :param covariance_matrix: (pd.DataFrame) the covariance matrix
        :param assets: (list) list of asset names in the portfolio
        :param allocation_metric: (str) the metric used for calculating weight allocations
        :param confidence_level: (float) the confidence level (alpha)
        """

        self.weights = pd.Series(1, index=self.ordered_indices)
        clustered_alphas = [self.ordered_indices]

        while clustered_alphas:
            clustered_alphas = [cluster[start:end]
                                for cluster in clustered_alphas
                                for start, end in ((0, len(cluster) // 2), (len(cluster) // 2, len(cluster)))
                                if len(cluster) > 1]

            for subcluster in range(0, len(clustered_alphas), 2):
                left_cluster = clustered_alphas[subcluster]
                right_cluster = clustered_alphas[subcluster + 1]

                # Calculate allocation factor based on the metric
                if allocation_metric == 'minimum_variance':
                    left_cluster_variance = self._get_cluster_variance(covariance_matrix, left_cluster)
                    right_cluster_variance = self._get_cluster_variance(covariance_matrix, right_cluster)
                    alloc_factor = 1 - left_cluster_variance / (left_cluster_variance + right_cluster_variance)
                elif allocation_metric == 'minimum_standard_deviation':
                    left_cluster_sd = np.sqrt(self._get_cluster_variance(covariance_matrix, left_cluster))
                    right_cluster_sd = np.sqrt(self._get_cluster_variance(covariance_matrix, right_cluster))
                    alloc_factor = 1 - left_cluster_sd / (left_cluster_sd + right_cluster_sd)
                elif allocation_metric == 'sharpe_ratio':
                    left_cluster_sharpe_ratio = self._get_cluster_sharpe_ratio(expected_asset_returns,
                                                                               covariance_matrix,
                                                                               left_cluster)
                    right_cluster_sharpe_ratio = self._get_cluster_sharpe_ratio(expected_asset_returns,
                                                                                covariance_matrix,
                                                                                right_cluster)
                    alloc_factor = left_cluster_sharpe_ratio / (left_cluster_sharpe_ratio + right_cluster_sharpe_ratio)

                    if alloc_factor < 0 or alloc_factor > 1:
                        left_cluster_variance = self._get_cluster_variance(covariance_matrix, left_cluster)
                        right_cluster_variance = self._get_cluster_variance(covariance_matrix, right_cluster)
                        alloc_factor = 1 - left_cluster_variance / (left_cluster_variance + right_cluster_variance)
                elif allocation_metric == 'expected_shortfall':
                    left_cluster_expected_shortfall = self._get_cluster_expected_shortfall(asset_returns=asset_returns,
                                                                                           covariance=covariance_matrix,
                                                                                           confidence_level=confidence_level,
                                                                                           cluster_indices=left_cluster)
                    right_cluster_expected_shortfall = self._get_cluster_expected_shortfall(asset_returns=asset_returns,
                                                                                           covariance=covariance_matrix,
                                                                                           confidence_level=confidence_level,
                                                                                           cluster_indices=right_cluster)
                    alloc_factor = \
                        1 - left_cluster_expected_shortfall / (left_cluster_expected_shortfall + right_cluster_expected_shortfall)
                elif allocation_metric == 'conditional_drawdown_risk':
                    left_cluster_conditional_drawdown = self._get_cluster_conditional_drawdown_at_risk(asset_returns=asset_returns,
                                                         covariance=covariance_matrix,
                                                         confidence_level=confidence_level,
                                                         cluster_indices=left_cluster)
                    right_cluster_conditional_drawdown = self._get_cluster_conditional_drawdown_at_risk(asset_returns=asset_returns,
                                                         covariance=covariance_matrix,
                                                         confidence_level=confidence_level,
                                                         cluster_indices=right_cluster)
                    alloc_factor = \
                        1 - left_cluster_conditional_drawdown / (left_cluster_conditional_drawdown + right_cluster_conditional_drawdown)
                else:
                    alloc_factor = 0.5 # equal weighting

                # Assign weights to each sub-cluster
                self.weights[left_cluster] *= alloc_factor
                self.weights[right_cluster] *= 1 - alloc_factor

        # Assign actual asset values to weight index
        self.weights.index = assets[self.ordered_indices]
        self.weights = pd.DataFrame(self.weights)
        self.weights = self.weights.T

    @staticmethod
    def _cov2corr(covariance):
        """
        Calculate the correlations from asset returns covariance matrix.

        :param covariance: (pd.DataFrame) asset returns covariances
        :return: (pd.DataFrame) correlations between asset returns
        """

        d_matrix = np.zeros_like(covariance)
        diagnoal_sqrt = np.sqrt(np.diag(covariance))
        np.fill_diagonal(d_matrix, diagnoal_sqrt)
        d_inv = np.linalg.inv(d_matrix)
        corr = np.dot(np.dot(d_inv, covariance), d_inv)
        corr = pd.DataFrame(corr, index=covariance.columns, columns=covariance.columns)
        return corr

    @staticmethod
    def _perform_checks(asset_prices, asset_returns, covariance_matrix, allocation_metric):
        # pylint: disable=bad-continuation
        """
        Perform initial warning checks.

        :param asset_prices: (pd.DataFrame) a dataframe of historical asset prices (daily close)
                                            indexed by date
        :param asset_returns: (pd.DataFrame/numpy matrix) user supplied matrix of asset returns
        :param covariance_matrix: (pd.DataFrame/numpy matrix) user supplied covariance matrix of asset returns
        :param allocation_metric: (str) the metric used for calculating weight allocations
        :return:
        """

        if asset_prices is None and asset_returns is None and covariance_matrix is None:
            raise ValueError("You need to supply either raw prices or returns or a covariance matrix of asset returns")

        if asset_prices is not None:
            if not isinstance(asset_prices, pd.DataFrame):
                raise ValueError("Asset prices matrix must be a dataframe")
            if not isinstance(asset_prices.index, pd.DatetimeIndex):
                raise ValueError("Asset prices dataframe must be indexed by date.")

        if allocation_metric not in \
                {'minimum_variance', 'minimum_standard_deviation', 'sharpe_ratio',
                 'equal_weighting', 'expected_shortfall', 'conditional_drawdown_risk'}:
            raise ValueError("Unknown allocation metric specified. Supported metrics are - minimum_variance, "
                             "minimum_standard_deviation, sharpe_ratio, equal_weighting, expected_shortfall, "
                             "conditional_drawdown_risk")

    def allocate(self,
                 asset_names,
                 asset_prices=None,
                 asset_returns=None,
                 covariance_matrix=None,
                 expected_asset_returns=None,
                 allocation_metric='equal_weighting',
                 confidence_level=0.05,
                 optimal_num_clusters=None,
                 resample_by=None):
        """
        Calculate asset allocations using the HCAA algorithm.

        :param asset_names: (list) a list of strings containing the asset names
        :param asset_prices: (pd.DataFrame) a dataframe of historical asset prices (daily close)
                                            indexed by date
        :param asset_returns: (pd.DataFrame/numpy matrix) user supplied matrix of asset returns
        :param covariance_matrix: (pd.DataFrame/numpy matrix) user supplied covariance matrix of asset returns
        :param expected_asset_returns: (list) a list of mean asset returns (mu)
        :param allocation_metric: (str) the metric used for calculating weight allocations
        :param confidence_level: (float) the confidence level (alpha) used for calculating expected shortfall and conditional
                                         drawdown at risk
        :param optimal_num_clusters: (int) optimal number of clusters for hierarchical clustering
        :param resample_by: (str) specifies how to resample the prices - weekly, daily, monthly etc.. Defaults to
                                  None for no resampling
        """

        # Perform initial checks
        self._perform_checks(asset_prices, asset_returns, covariance_matrix, allocation_metric)

        # Calculate the expected returns if the user does not supply any returns
        if allocation_metric == 'sharpe_ratio' and expected_asset_returns is None:
            if asset_prices is None:
                raise ValueError(
                    "Either provide pre-calculated expected returns or give raw asset prices for inbuilt returns calculation")

            if self.calculate_expected_returns == "mean":
                expected_asset_returns = self.returns_estimator.calculate_mean_historical_returns(
                    asset_prices=asset_prices,
                    resample_by=resample_by)
            elif self.calculate_expected_returns == "exponential":
                expected_asset_returns = self.returns_estimator.calculate_exponential_historical_returns(
                    asset_prices=asset_prices,
                    resample_by=resample_by)
            else:
                raise ValueError("Unknown returns specified. Supported returns - mean, exponential")

        # Calculate the returns if the user does not supply a returns dataframe
        if asset_returns is None:
            asset_returns = self.returns_estimator.calculate_returns(asset_prices=asset_prices, resample_by=resample_by)
        asset_returns = pd.DataFrame(asset_returns, columns=asset_names)

        # Calculate covariance of returns or use the user specified covariance matrix
        if covariance_matrix is None:
            covariance_matrix = asset_returns.cov()
        cov = pd.DataFrame(covariance_matrix, index=asset_names, columns=asset_names)

        # Calculate correlation from covariance matrix
        corr = self._cov2corr(covariance=cov)

        # Calculate the optimal number of clusters using the Gap statistic
        if not optimal_num_clusters:
            optimal_num_clusters = self._get_optimal_number_of_clusters(correlation=corr, asset_returns=asset_returns)

        # Tree Clustering
        self.clusters = self._tree_clustering(correlation=corr, num_clusters=optimal_num_clusters)

        # Quasi Diagnalization
        num_assets = len(asset_names)
        self.ordered_indices = self._quasi_diagnalization(num_assets, 2 * num_assets - 2)

        # Recursive Bisection
        self._recursive_bisection(expected_asset_returns=expected_asset_returns,
                                  asset_returns=asset_returns,
                                  covariance_matrix=cov,
                                  assets=asset_names,
                                  allocation_metric=allocation_metric,
                                  confidence_level=confidence_level)