Beispiel #1
0
class MeanVarianceOptimisation:
    """
    This class implements some classic mean-variance optimisation techniques for calculating the efficient frontier solutions.
    With the help of quadratic optimisers, users can generate optimal portfolios for different objective functions. Currently
    solutions to the following portfolios can be generated:
        1. Inverse Variance
        2. Maximum Sharpe
        3. Minimum Volatility
        4. Efficient Risk
    """
    def __init__(self, calculate_expected_returns='mean'):
        """
        Constructor.

        :param calculate_expected_returns: (str) the method to use for calculation of expected returns.
        Currently supports "mean" and "exponential"
        """

        self.weights = list()
        self.portfolio_risk = None
        self.portfolio_return = None
        self.portfolio_sharpe_ratio = None
        self.calculate_expected_returns = calculate_expected_returns
        self.returns_estimator = ReturnsEstimation()
        self.weight_bounds = None

    def allocate(self,
                 asset_names,
                 asset_prices=None,
                 expected_asset_returns=None,
                 covariance_matrix=None,
                 solution='inverse_variance',
                 risk_free_rate=0.05,
                 target_return=0.2,
                 weight_bounds=(0, 1),
                 resample_by=None):
        # pylint: disable=invalid-name, too-many-branches, bad-continuation
        """
        Calculate the portfolio asset allocations using the method specified.

        :param asset_names: (list) a list of strings containing the asset names
        :param asset_prices: (pd.Dataframe) a dataframe of historical asset prices (daily close)
        :param expected_asset_returns: (list/np.array/pd.dataframe) a list of mean stock returns (mu)
        :param covariance_matrix: (pd.Dataframe/numpy matrix) user supplied covariance matrix of asset returns (sigma)
        :param solution: (str) the type of solution/algorithm to use to calculate the weights.
                               Currently supported solution strings - inverse_variance, min_volatility, max_sharpe and
                               efficient_risk
        :param risk_free_rate: (float) the rate of return for a risk-free asset.
        :param target_return: (float) target return of the portfolio
        :param weight_bounds: (dict/tuple) can be either a single tuple of upper and lower bounds
                                          for all portfolio weights or a dictionary mapping of individual asset indices
                                          to tuples of upper and lower bounds. Those indices which do not have any mapping
                                          will have a (0, 1) default bound.
        :param resample_by: (str) specifies how to resample the prices - weekly, daily, monthly etc.. Defaults to
                                  None for no resampling
        """

        if asset_prices is None and expected_asset_returns is None and covariance_matrix is None:
            raise ValueError(
                "You need to supply either raw prices or expected returns "
                "and a covariance matrix of asset returns")

        if asset_prices is not None:
            if not isinstance(asset_prices, pd.DataFrame):
                raise ValueError("Asset prices matrix must be a dataframe")
            if not isinstance(asset_prices.index, pd.DatetimeIndex):
                raise ValueError(
                    "Asset prices dataframe must be indexed by date.")

        # Weight bounds
        self.weight_bounds = weight_bounds

        # Calculate the expected returns if the user does not supply any returns
        if expected_asset_returns is None:
            if self.calculate_expected_returns == "mean":
                expected_asset_returns = self.returns_estimator.calculate_mean_historical_returns(
                    asset_prices=asset_prices, resample_by=resample_by)
            elif self.calculate_expected_returns == "exponential":
                expected_asset_returns = self.returns_estimator.calculate_exponential_historical_returns(
                    asset_prices=asset_prices, resample_by=resample_by)
            else:
                raise ValueError(
                    "Unknown returns specified. Supported returns - mean, exponential"
                )
        expected_asset_returns = np.array(expected_asset_returns).reshape(
            (len(expected_asset_returns), 1))

        # Calculate covariance of returns or use the user specified covariance matrix
        if covariance_matrix is None:
            returns = self.returns_estimator.calculate_returns(
                asset_prices=asset_prices, resample_by=resample_by)
            covariance_matrix = returns.cov()
        cov = pd.DataFrame(covariance_matrix,
                           index=asset_names,
                           columns=asset_names)

        if solution == 'inverse_variance':
            self.weights = self._inverse_variance(covariance=cov)
        elif solution == 'min_volatility':
            self.weights, self.portfolio_risk = self._min_volatility(
                covariance=cov, num_assets=len(asset_names))
        elif solution == 'max_sharpe':
            self.weights, self.portfolio_risk, self.portfolio_return = self._max_sharpe(
                covariance=cov,
                expected_returns=expected_asset_returns,
                risk_free_rate=risk_free_rate,
                num_assets=len(asset_names))
        elif solution == 'efficient_risk':
            self.weights, self.portfolio_risk, self.portfolio_return = self._min_volatility_for_target_return(
                covariance=cov,
                expected_returns=expected_asset_returns,
                target_return=target_return,
                num_assets=len(asset_names))
        else:
            raise ValueError(
                "Unknown solution string specified. Supported solutions - "
                "inverse_variance, min_volatility, max_sharpe and efficient_risk."
            )

        # Round weights which are very very small negative numbers (e.g. -4.7e-16) to 0
        negative_weight_indices = np.argwhere(self.weights < 0)
        self.weights[negative_weight_indices] = np.round(
            self.weights[negative_weight_indices], 3)

        # Calculate the portfolio risk and return if it has not been calculated
        if self.portfolio_risk is None:
            self.portfolio_risk = np.dot(self.weights,
                                         np.dot(cov.values, self.weights.T))
        if self.portfolio_return is None:
            self.portfolio_return = np.dot(self.weights,
                                           expected_asset_returns)
        self.portfolio_sharpe_ratio = (
            (self.portfolio_return - risk_free_rate) /
            (self.portfolio_risk**0.5))

        self.weights = pd.DataFrame(self.weights)
        self.weights.index = asset_names
        self.weights = self.weights.T

    @staticmethod
    def _inverse_variance(covariance):
        """
        Calculate weights using inverse-variance allocation.

        :param covariance: (pd.Dataframe) covariance dataframe of asset returns
        :return: (np.array) array of portfolio weights
        """

        ivp = 1. / np.diag(covariance)
        ivp /= ivp.sum()
        return ivp

    def _min_volatility(self, covariance, num_assets):
        """
        Compute minimum volatility portfolio allocation.

        :param covariance: (pd.Dataframe) covariance dataframe of asset returns
        :param num_assets: (int) number of assets in the portfolio
        :return: (np.array, float) portfolio weights and risk value
        """

        weights = cp.Variable(num_assets)
        weights.value = np.array([1 / num_assets] * num_assets)
        risk = cp.quad_form(weights, covariance)

        # Optimisation objective and constraints
        allocation_objective = cp.Minimize(risk)
        allocation_constraints = [
            cp.sum(weights) == 1,
        ]
        if isinstance(self.weight_bounds, tuple):
            allocation_constraints.extend([
                weights >= self.weight_bounds[0],
                weights <= min(self.weight_bounds[1], 1)
            ])
        if isinstance(self.weight_bounds, dict):
            asset_indices = list(range(num_assets))
            for asset_index in asset_indices:
                lower_bound, upper_bound = self.weight_bounds.get(
                    asset_index, (0, 1))
                allocation_constraints.extend([
                    weights[asset_index] >= lower_bound,
                    weights[asset_index] <= min(upper_bound, 1)
                ])

        # Define and solve the problem
        problem = cp.Problem(objective=allocation_objective,
                             constraints=allocation_constraints)
        problem.solve(warm_start=True)
        if weights.value is None:
            raise ValueError('No optimal set of weights found.')
        return weights.value, risk.value**0.5

    def _max_sharpe(self, covariance, expected_returns, risk_free_rate,
                    num_assets):
        # pylint: disable=invalid-name
        """
        Compute maximum Sharpe portfolio allocation.

        :param covariance: (pd.Dataframe) covariance dataframe of asset returns
        :param expected_asset_returns: (list/np.array/pd.dataframe) a list of mean stock returns (mu)
        :param risk_free_rate: (float) the rate of return for a risk-free asset.
        :param num_assets: (int) number of assets in the portfolio
        :return: (np.array, float, float) portfolio weights, risk value and return value
        """

        y = cp.Variable(num_assets)
        y.value = np.array([1 / num_assets] * num_assets)
        kappa = cp.Variable(1)
        risk = cp.quad_form(y, covariance)

        # Optimisation objective and constraints
        allocation_objective = cp.Minimize(risk)
        allocation_constraints = [
            cp.sum((expected_returns - risk_free_rate).T @ y) == 1,
            cp.sum(y) == kappa, kappa >= 0
        ]
        if isinstance(self.weight_bounds, tuple):
            allocation_constraints.extend([
                y >= kappa * self.weight_bounds[0],
                y <= kappa * self.weight_bounds[1]
            ])
        if isinstance(self.weight_bounds, dict):
            asset_indices = list(range(num_assets))
            for asset_index in asset_indices:
                lower_bound, upper_bound = self.weight_bounds.get(
                    asset_index, (0, 1))
                allocation_constraints.extend([
                    y[asset_index] >= kappa * lower_bound,
                    y[asset_index] <= kappa * upper_bound
                ])

        # Define and solve the problem
        problem = cp.Problem(objective=allocation_objective,
                             constraints=allocation_constraints)
        problem.solve(warm_start=True)
        if y.value is None or kappa.value is None:
            raise ValueError('No optimal set of weights found.')
        weights = y.value / kappa.value
        portfolio_return = (expected_returns.T @ weights)[0]
        return weights, risk.value**0.5, portfolio_return

    def _min_volatility_for_target_return(self, covariance, expected_returns,
                                          target_return, num_assets):
        """
        Calculate minimum volatility portfolio for a given target return.

        :param covariance: (pd.Dataframe) covariance dataframe of asset returns
        :param expected_asset_returns: (list/np.array/pd.dataframe) a list of mean stock returns (mu)
        :param target_return: (float) target return of the portfolio
        :param num_assets: (int) number of assets in the portfolio
        :return: (np.array, float, float) portfolio weights, risk value and return value
        """

        weights = cp.Variable(num_assets)
        risk = cp.quad_form(weights, covariance)

        # Optimisation objective and constraints
        allocation_objective = cp.Minimize(risk)
        allocation_constraints = [
            cp.sum(weights) == 1,
            (expected_returns.T @ weights)[0] == target_return,
        ]
        if isinstance(self.weight_bounds, tuple):
            allocation_constraints.extend([
                weights >= self.weight_bounds[0],
                weights <= min(self.weight_bounds[1], 1)
            ])
        if isinstance(self.weight_bounds, dict):
            asset_indices = list(range(num_assets))
            for asset_index in asset_indices:
                lower_bound, upper_bound = self.weight_bounds.get(
                    asset_index, (0, 1))
                allocation_constraints.extend([
                    weights[asset_index] >= lower_bound,
                    weights[asset_index] <= min(upper_bound, 1)
                ])

        # Define and solve the problem
        problem = cp.Problem(objective=allocation_objective,
                             constraints=allocation_constraints)
        problem.solve()
        if weights.value is None:
            raise ValueError('No optimal set of weights found.')
        return weights.value, risk.value**0.5, target_return

    def plot_efficient_frontier(self,
                                covariance,
                                expected_asset_returns,
                                num_assets,
                                min_return=0,
                                max_return=0.4,
                                risk_free_rate=0.05):
        # pylint: disable=bad-continuation, broad-except
        """
        Plot the Markowitz efficient frontier.

        :param covariance: (pd.Dataframe) covariance dataframe of asset returns
        :param expected_asset_returns: (list/np.array/pd.dataframe) a list of mean stock returns (mu)
        :param num_assets: (int) number of assets in the portfolio
        :param min_return: (float) minimum target return
        :param max_return: (float) maximum target return
        :param risk_free_rate: (float) the rate of return for a risk-free asset.
        """

        expected_returns = np.array(expected_asset_returns).reshape(
            (len(expected_asset_returns), 1))
        volatilities = []
        returns = []
        sharpe_ratios = []
        for portfolio_return in np.linspace(min_return, max_return, 100):
            _, risk, _ = self._min_volatility_for_target_return(
                covariance=covariance,
                expected_returns=expected_returns,
                target_return=portfolio_return,
                num_assets=num_assets)
            volatilities.append(risk)
            returns.append(portfolio_return)
            sharpe_ratios.append(
                (portfolio_return - risk_free_rate) / (risk**0.5 + 1e-16))
        max_sharpe_ratio_index = sharpe_ratios.index(max(sharpe_ratios))
        min_volatility_index = volatilities.index(min(volatilities))
        figure = plt.scatter(volatilities,
                             returns,
                             c=sharpe_ratios,
                             cmap='viridis')
        plt.colorbar(label='Sharpe Ratio')
        plt.scatter(volatilities[max_sharpe_ratio_index],
                    returns[max_sharpe_ratio_index],
                    marker='*',
                    color='g',
                    s=400,
                    label='Maximum Sharpe Ratio')
        plt.scatter(volatilities[min_volatility_index],
                    returns[min_volatility_index],
                    marker='*',
                    color='r',
                    s=400,
                    label='Minimum Volatility')
        plt.xlabel('Volatility')
        plt.ylabel('Return')
        plt.legend(loc='upper left')
        return figure
Beispiel #2
0
class HierarchicalClusteringAssetAllocation:
    """
    This class implements the Hierarchical Equal Risk Contribution (HERC) algorithm and it's extended components mentioned in the
    following papers: `Raffinot, Thomas, The Hierarchical Equal Risk Contribution Portfolio (August 23,
    2018). <https://ssrn.com/abstract=3237540>`_; and `Raffinot, Thomas, Hierarchical Clustering Based Asset Allocation (May 2017)
    <https://ssrn.com/abstract=2840729>`_;

    While the vanilla Hierarchical Risk Parity algorithm uses only the variance as a risk measure for assigning weights, the HERC
    algorithm proposed by Raffinot, allows investors to use other risk metrics like Expected Shortfall, Sharpe Ratio and
    Conditional Drawdown. Furthermore, it is flexible enough to be easily extended to include custom risk measures of our own.
    """

    def __init__(self, calculate_expected_returns='mean'):
        """
        Constructor.

        :param calculate_expected_returns: (str) the method to use for calculation of expected returns.
        Currently supports "mean" and "exponential"
        """

        self.weights = list()
        self.clusters = None
        self.ordered_indices = None
        self.returns_estimator = ReturnsEstimation()
        self.risk_metrics = RiskMetrics()
        self.calculate_expected_returns = calculate_expected_returns

    @staticmethod
    def _compute_cluster_inertia(labels, asset_returns):
        """
        Calculate the cluster inertia (within cluster sum-of-squares).

        :param labels: (list) cluster labels
        :param asset_returns: (pd.DataFrame) historical asset returns
        :return: (float) cluster inertia value
        """

        unique_labels = np.unique(labels)
        inertia = [np.mean(pairwise_distances(asset_returns[:, labels == label])) for label in unique_labels]
        inertia = np.log(np.sum(inertia))
        return inertia

    def _get_optimal_number_of_clusters(self,
                                        correlation,
                                        asset_returns,
                                        num_reference_datasets=5,
                                        max_number_of_clusters=10):
        """
        Find the optimal number of clusters for hierarchical clustering using the Gap statistic.

        :param correlation: (np.array) matrix of asset correlations
        :param asset_returns: (pd.DataFrame) historical asset returns
        :param num_reference_datasets: (int) the number of reference datasets to generate for calculating expected inertia
        :param max_number_of_clusters: (int) the maximum number of clusters to check for finding the optimal value
        :return: (int) the optimal number of clusters
        """

        cluster_func = AgglomerativeClustering(affinity='precomputed', linkage='single')
        original_distance_matrix = np.sqrt(2 * (1 - correlation).round(5))
        gap_values = []
        for num_clusters in range(1, max_number_of_clusters + 1):
            cluster_func.n_clusters = num_clusters

            # Calculate expected inertia from reference datasets
            reference_inertias = []
            for _ in range(num_reference_datasets):

                # Generate reference returns from uniform distribution and calculate the distance matrix.
                reference_asset_returns = pd.DataFrame(np.random.rand(*asset_returns.shape))
                reference_correlation = np.array(reference_asset_returns.corr())
                reference_distance_matrix = np.sqrt(2 * (1 - reference_correlation).round(5))

                reference_cluster_assignments = cluster_func.fit_predict(reference_distance_matrix)
                inertia = self._compute_cluster_inertia(reference_cluster_assignments, reference_asset_returns.values)
                reference_inertias.append(inertia)
            expected_inertia = np.mean(reference_inertias)

            # Calculate inertia from original data
            original_cluster_asignments = cluster_func.fit_predict(original_distance_matrix)
            inertia = self._compute_cluster_inertia(original_cluster_asignments, asset_returns.values)

            # Calculate the gap statistic
            gap = expected_inertia - inertia
            gap_values.append(gap)

        return np.argmax(gap_values)

    @staticmethod
    def _tree_clustering(correlation, num_clusters):
        """
        Perform agglomerative clustering on the current portfolio.

        :param correlation: (np.array) matrix of asset correlations
        :param num_clusters: (int) the number of clusters
        :return: (list) structure of hierarchical tree
        """

        cluster_func = AgglomerativeClustering(n_clusters=num_clusters,
                                               affinity='precomputed',
                                               linkage='single')
        distance_matrix = np.sqrt(2 * (1 - correlation).round(5))
        cluster_func.fit(distance_matrix)
        return cluster_func.children_

    def _quasi_diagnalization(self, num_assets, curr_index):
        """
        Rearrange the assets to reorder them according to hierarchical tree clustering order.

        :param num_assets: (int) the total number of assets
        :param curr_index: (int) current index
        :return: (list) the assets rearranged according to hierarchical clustering
        """

        if curr_index < num_assets:
            return [curr_index]

        left = int(self.clusters[curr_index - num_assets, 0])
        right = int(self.clusters[curr_index - num_assets, 1])

        return (self._quasi_diagnalization(num_assets, left) + self._quasi_diagnalization(num_assets, right))

    @staticmethod
    def _get_inverse_variance_weights(covariance):
        '''
        Calculate the inverse variance weight allocations.

        :param covariance: (pd.DataFrame) covariance matrix of assets
        :return: (list) inverse variance weight values
        '''

        inv_diag = 1 / np.diag(covariance.values)
        parity_w = inv_diag * (1 / np.sum(inv_diag))
        return parity_w

    def _get_cluster_variance(self, covariance, cluster_indices):
        """
        Calculate cluster variance.

        :param covariance: (pd.DataFrame) covariance matrix of assets
        :param cluster_indices: (list) list of asset indices for the cluster
        :return: (float) variance of the cluster
        """

        cluster_covariance = covariance.iloc[cluster_indices, cluster_indices]
        parity_w = self._get_inverse_variance_weights(cluster_covariance)
        cluster_variance = self.risk_metrics.calculate_variance(covariance=cluster_covariance, weights=parity_w)
        return cluster_variance

    def _get_cluster_sharpe_ratio(self, expected_asset_returns, covariance, cluster_indices):
        """
        Calculate cluster Sharpe Ratio.

        :param expected_asset_returns: (list) a list of mean asset returns (mu)
        :param covariance: (pd.DataFrame) covariance matrix of assets
        :param cluster_indices: (list) list of asset indices for the cluster
        :return: (float) sharpe ratio of the cluster
        """

        cluster_expected_returns = expected_asset_returns[cluster_indices]
        cluster_covariance = covariance.iloc[cluster_indices, cluster_indices]
        parity_w = self._get_inverse_variance_weights(cluster_covariance)
        cluster_variance = self.risk_metrics.calculate_variance(covariance=cluster_covariance, weights=parity_w)
        cluster_sharpe_ratio = (parity_w @ cluster_expected_returns) / np.sqrt(cluster_variance)
        return cluster_sharpe_ratio

    def _get_cluster_expected_shortfall(self, asset_returns, covariance, confidence_level, cluster_indices):
        """
        Calculate cluster expected shortfall.

        :param asset_returns: (pd.DataFrame) historical asset returns
        :param covariance: (pd.DataFrame) covariance matrix of assets
        :param confidence_level: (float) the confidence level (alpha)
        :param cluster_indices: (list) list of asset indices for the cluster
        :return: (float) expected shortfall of the cluster
        """

        cluster_asset_returns = asset_returns.iloc[:, cluster_indices]
        cluster_covariance = covariance.iloc[cluster_indices, cluster_indices]
        parity_w = self._get_inverse_variance_weights(cluster_covariance)
        portfolio_returns = cluster_asset_returns @ parity_w
        cluster_expected_shortfall = self.risk_metrics.calculate_expected_shortfall(returns=portfolio_returns,
                                                                                    confidence_level=confidence_level)
        return cluster_expected_shortfall

    def _get_cluster_conditional_drawdown_at_risk(self, asset_returns, covariance, confidence_level, cluster_indices):
        """
        Calculate cluster conditional drawdown at risk.

        :param asset_returns: (pd.DataFrame) historical asset returns
        :param covariance: (pd.DataFrame) covariance matrix of assets
        :param confidence_level: (float) the confidence level (alpha)
        :param cluster_indices: (list) list of asset indices for the cluster
        :return: (float) CDD of the cluster
        """

        cluster_asset_returns = asset_returns.iloc[:, cluster_indices]
        cluster_covariance = covariance.iloc[cluster_indices, cluster_indices]
        parity_w = self._get_inverse_variance_weights(cluster_covariance)
        portfolio_returns = cluster_asset_returns @ parity_w
        cluster_conditional_drawdown = self.risk_metrics.calculate_conditional_drawdown_risk(returns=portfolio_returns,
                                                                                             confidence_level=confidence_level)
        return cluster_conditional_drawdown

    def _recursive_bisection(self,
                             expected_asset_returns,
                             asset_returns,
                             covariance_matrix,
                             assets,
                             allocation_metric,
                             confidence_level):
        # pylint: disable=bad-continuation, too-many-locals
        """
        Recursively assign weights to the clusters - ultimately assigning weights to the individual assets.

        :param expected_asset_returns: (list) a list of mean asset returns (mu)
        :param asset_returns: (pd.DataFrame) historical asset returns
        :param covariance_matrix: (pd.DataFrame) the covariance matrix
        :param assets: (list) list of asset names in the portfolio
        :param allocation_metric: (str) the metric used for calculating weight allocations
        :param confidence_level: (float) the confidence level (alpha)
        """

        self.weights = pd.Series(1, index=self.ordered_indices)
        clustered_alphas = [self.ordered_indices]

        while clustered_alphas:
            clustered_alphas = [cluster[start:end]
                                for cluster in clustered_alphas
                                for start, end in ((0, len(cluster) // 2), (len(cluster) // 2, len(cluster)))
                                if len(cluster) > 1]

            for subcluster in range(0, len(clustered_alphas), 2):
                left_cluster = clustered_alphas[subcluster]
                right_cluster = clustered_alphas[subcluster + 1]

                # Calculate allocation factor based on the metric
                if allocation_metric == 'minimum_variance':
                    left_cluster_variance = self._get_cluster_variance(covariance_matrix, left_cluster)
                    right_cluster_variance = self._get_cluster_variance(covariance_matrix, right_cluster)
                    alloc_factor = 1 - left_cluster_variance / (left_cluster_variance + right_cluster_variance)
                elif allocation_metric == 'minimum_standard_deviation':
                    left_cluster_sd = np.sqrt(self._get_cluster_variance(covariance_matrix, left_cluster))
                    right_cluster_sd = np.sqrt(self._get_cluster_variance(covariance_matrix, right_cluster))
                    alloc_factor = 1 - left_cluster_sd / (left_cluster_sd + right_cluster_sd)
                elif allocation_metric == 'sharpe_ratio':
                    left_cluster_sharpe_ratio = self._get_cluster_sharpe_ratio(expected_asset_returns,
                                                                               covariance_matrix,
                                                                               left_cluster)
                    right_cluster_sharpe_ratio = self._get_cluster_sharpe_ratio(expected_asset_returns,
                                                                                covariance_matrix,
                                                                                right_cluster)
                    alloc_factor = left_cluster_sharpe_ratio / (left_cluster_sharpe_ratio + right_cluster_sharpe_ratio)

                    if alloc_factor < 0 or alloc_factor > 1:
                        left_cluster_variance = self._get_cluster_variance(covariance_matrix, left_cluster)
                        right_cluster_variance = self._get_cluster_variance(covariance_matrix, right_cluster)
                        alloc_factor = 1 - left_cluster_variance / (left_cluster_variance + right_cluster_variance)
                elif allocation_metric == 'expected_shortfall':
                    left_cluster_expected_shortfall = self._get_cluster_expected_shortfall(asset_returns=asset_returns,
                                                                                           covariance=covariance_matrix,
                                                                                           confidence_level=confidence_level,
                                                                                           cluster_indices=left_cluster)
                    right_cluster_expected_shortfall = self._get_cluster_expected_shortfall(asset_returns=asset_returns,
                                                                                           covariance=covariance_matrix,
                                                                                           confidence_level=confidence_level,
                                                                                           cluster_indices=right_cluster)
                    alloc_factor = \
                        1 - left_cluster_expected_shortfall / (left_cluster_expected_shortfall + right_cluster_expected_shortfall)
                elif allocation_metric == 'conditional_drawdown_risk':
                    left_cluster_conditional_drawdown = self._get_cluster_conditional_drawdown_at_risk(asset_returns=asset_returns,
                                                         covariance=covariance_matrix,
                                                         confidence_level=confidence_level,
                                                         cluster_indices=left_cluster)
                    right_cluster_conditional_drawdown = self._get_cluster_conditional_drawdown_at_risk(asset_returns=asset_returns,
                                                         covariance=covariance_matrix,
                                                         confidence_level=confidence_level,
                                                         cluster_indices=right_cluster)
                    alloc_factor = \
                        1 - left_cluster_conditional_drawdown / (left_cluster_conditional_drawdown + right_cluster_conditional_drawdown)
                else:
                    alloc_factor = 0.5 # equal weighting

                # Assign weights to each sub-cluster
                self.weights[left_cluster] *= alloc_factor
                self.weights[right_cluster] *= 1 - alloc_factor

        # Assign actual asset values to weight index
        self.weights.index = assets[self.ordered_indices]
        self.weights = pd.DataFrame(self.weights)
        self.weights = self.weights.T

    @staticmethod
    def _cov2corr(covariance):
        """
        Calculate the correlations from asset returns covariance matrix.

        :param covariance: (pd.DataFrame) asset returns covariances
        :return: (pd.DataFrame) correlations between asset returns
        """

        d_matrix = np.zeros_like(covariance)
        diagnoal_sqrt = np.sqrt(np.diag(covariance))
        np.fill_diagonal(d_matrix, diagnoal_sqrt)
        d_inv = np.linalg.inv(d_matrix)
        corr = np.dot(np.dot(d_inv, covariance), d_inv)
        corr = pd.DataFrame(corr, index=covariance.columns, columns=covariance.columns)
        return corr

    @staticmethod
    def _perform_checks(asset_prices, asset_returns, covariance_matrix, allocation_metric):
        # pylint: disable=bad-continuation
        """
        Perform initial warning checks.

        :param asset_prices: (pd.DataFrame) a dataframe of historical asset prices (daily close)
                                            indexed by date
        :param asset_returns: (pd.DataFrame/numpy matrix) user supplied matrix of asset returns
        :param covariance_matrix: (pd.DataFrame/numpy matrix) user supplied covariance matrix of asset returns
        :param allocation_metric: (str) the metric used for calculating weight allocations
        :return:
        """

        if asset_prices is None and asset_returns is None and covariance_matrix is None:
            raise ValueError("You need to supply either raw prices or returns or a covariance matrix of asset returns")

        if asset_prices is not None:
            if not isinstance(asset_prices, pd.DataFrame):
                raise ValueError("Asset prices matrix must be a dataframe")
            if not isinstance(asset_prices.index, pd.DatetimeIndex):
                raise ValueError("Asset prices dataframe must be indexed by date.")

        if allocation_metric not in \
                {'minimum_variance', 'minimum_standard_deviation', 'sharpe_ratio',
                 'equal_weighting', 'expected_shortfall', 'conditional_drawdown_risk'}:
            raise ValueError("Unknown allocation metric specified. Supported metrics are - minimum_variance, "
                             "minimum_standard_deviation, sharpe_ratio, equal_weighting, expected_shortfall, "
                             "conditional_drawdown_risk")

    def allocate(self,
                 asset_names,
                 asset_prices=None,
                 asset_returns=None,
                 covariance_matrix=None,
                 expected_asset_returns=None,
                 allocation_metric='equal_weighting',
                 confidence_level=0.05,
                 optimal_num_clusters=None,
                 resample_by=None):
        """
        Calculate asset allocations using the HCAA algorithm.

        :param asset_names: (list) a list of strings containing the asset names
        :param asset_prices: (pd.DataFrame) a dataframe of historical asset prices (daily close)
                                            indexed by date
        :param asset_returns: (pd.DataFrame/numpy matrix) user supplied matrix of asset returns
        :param covariance_matrix: (pd.DataFrame/numpy matrix) user supplied covariance matrix of asset returns
        :param expected_asset_returns: (list) a list of mean asset returns (mu)
        :param allocation_metric: (str) the metric used for calculating weight allocations
        :param confidence_level: (float) the confidence level (alpha) used for calculating expected shortfall and conditional
                                         drawdown at risk
        :param optimal_num_clusters: (int) optimal number of clusters for hierarchical clustering
        :param resample_by: (str) specifies how to resample the prices - weekly, daily, monthly etc.. Defaults to
                                  None for no resampling
        """

        # Perform initial checks
        self._perform_checks(asset_prices, asset_returns, covariance_matrix, allocation_metric)

        # Calculate the expected returns if the user does not supply any returns
        if allocation_metric == 'sharpe_ratio' and expected_asset_returns is None:
            if asset_prices is None:
                raise ValueError(
                    "Either provide pre-calculated expected returns or give raw asset prices for inbuilt returns calculation")

            if self.calculate_expected_returns == "mean":
                expected_asset_returns = self.returns_estimator.calculate_mean_historical_returns(
                    asset_prices=asset_prices,
                    resample_by=resample_by)
            elif self.calculate_expected_returns == "exponential":
                expected_asset_returns = self.returns_estimator.calculate_exponential_historical_returns(
                    asset_prices=asset_prices,
                    resample_by=resample_by)
            else:
                raise ValueError("Unknown returns specified. Supported returns - mean, exponential")

        # Calculate the returns if the user does not supply a returns dataframe
        if asset_returns is None:
            asset_returns = self.returns_estimator.calculate_returns(asset_prices=asset_prices, resample_by=resample_by)
        asset_returns = pd.DataFrame(asset_returns, columns=asset_names)

        # Calculate covariance of returns or use the user specified covariance matrix
        if covariance_matrix is None:
            covariance_matrix = asset_returns.cov()
        cov = pd.DataFrame(covariance_matrix, index=asset_names, columns=asset_names)

        # Calculate correlation from covariance matrix
        corr = self._cov2corr(covariance=cov)

        # Calculate the optimal number of clusters using the Gap statistic
        if not optimal_num_clusters:
            optimal_num_clusters = self._get_optimal_number_of_clusters(correlation=corr, asset_returns=asset_returns)

        # Tree Clustering
        self.clusters = self._tree_clustering(correlation=corr, num_clusters=optimal_num_clusters)

        # Quasi Diagnalization
        num_assets = len(asset_names)
        self.ordered_indices = self._quasi_diagnalization(num_assets, 2 * num_assets - 2)

        # Recursive Bisection
        self._recursive_bisection(expected_asset_returns=expected_asset_returns,
                                  asset_returns=asset_returns,
                                  covariance_matrix=cov,
                                  assets=asset_names,
                                  allocation_metric=allocation_metric,
                                  confidence_level=confidence_level)
Beispiel #3
0
class MeanVarianceOptimisation:
    # pylint: disable=too-many-instance-attributes
    """
    This class implements some classic mean-variance optimisation techniques for calculating the efficient frontier solutions.
    With the help of quadratic optimisers, users can generate optimal portfolios for different objective functions. Currently
    solutions to the following portfolios can be generated:
        1. Inverse Variance
        2. Maximum Sharpe
        3. Minimum Volatility
        4. Efficient Risk
        5. Maximum Return - Minimum Volatility
        6. Efficient Return
        7. Maximum Diversification
        8. Maximum Decorrelation
        9. Custom Objective Function
    """

    def __init__(self, calculate_expected_returns='mean', risk_free_rate=0.03):
        """
        Constructor.

        :param calculate_expected_returns: (str) The method to use for calculation of expected returns.
                                                 Currently supports "mean" and "exponential".
        """

        self.weights = list()
        self.asset_names = None
        self.num_assets = None
        self.portfolio_risk = None
        self.portfolio_return = None
        self.portfolio_sharpe_ratio = None
        self.calculate_expected_returns = calculate_expected_returns
        self.returns_estimator = ReturnsEstimation()
        self.risk_estimators = RiskEstimators()
        self.weight_bounds = (0, 1)
        self.risk_free_rate = risk_free_rate

    def allocate(self, asset_names=None, asset_prices=None, expected_asset_returns=None, covariance_matrix=None,
                 solution='inverse_variance', target_return=0.2, target_risk=0.01, risk_aversion=10, weight_bounds=None):
        # pylint: disable=invalid-name, too-many-branches
        """
        Calculate the portfolio asset allocations using the method specified.

        :param asset_names: (list) A list of strings containing the asset names.
        :param asset_prices: (pd.DataFrame) A dataframe of historical asset prices (daily close).
        :param expected_asset_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        :param covariance_matrix: (pd.DataFrame/numpy matrix) User supplied covariance matrix of asset returns (sigma).
        :param solution: (str) The type of solution/algorithm to use to calculate the weights.
                               Currently supported solution strings - inverse_variance, min_volatility, max_sharpe,
                               efficient_risk, max_return_min_volatility, max_diversification, efficient_return
                               and max_decorrelation.
        :param target_return: (float) Target return of the portfolio.
        :param target_risk: (float) Target risk of the portfolio.
        :param risk_aversion: (float) Quantifies the risk averse nature of the investor - a higher value means
                                      more risk averse and vice-versa.
        :param weight_bounds: (dict/tuple) Can be either a single tuple of upper and lower bounds
                                           for all portfolio weights or a list of strings with each string representing
                                           an inequality on the weights. For e.g. to bound the weight of the 3rd asset
                                           pass the following weight bounds: ['weights[2] <= 0.3', 'weights[2] >= 0.1'].
        """

        self._error_checks(asset_names, asset_prices, expected_asset_returns, covariance_matrix, solution)

        # Weight bounds
        if weight_bounds is not None:
            self.weight_bounds = weight_bounds

        # Calculate the expected asset returns and covariance matrix if not given by the user
        expected_asset_returns, cov = self._calculate_estimators(asset_prices,
                                                                 expected_asset_returns,
                                                                 covariance_matrix)

        if solution == 'inverse_variance':
            self._inverse_variance(covariance=cov, expected_returns=expected_asset_returns)
        elif solution == 'min_volatility':
            self._min_volatility(covariance=cov,
                                 expected_returns=expected_asset_returns)
        elif solution == 'max_return_min_volatility':
            self._max_return_min_volatility(covariance=cov,
                                            expected_returns=expected_asset_returns,
                                            risk_aversion=risk_aversion)
        elif solution == 'max_sharpe':
            self._max_sharpe(covariance=cov,
                             expected_returns=expected_asset_returns)
        elif solution == 'efficient_risk':
            self._min_volatility_for_target_return(covariance=cov,
                                                   expected_returns=expected_asset_returns,
                                                   target_return=target_return)
        elif solution == 'efficient_return':
            self._max_return_for_target_risk(covariance=cov,
                                             expected_returns=expected_asset_returns,
                                             target_risk=target_risk)
        elif solution == 'max_diversification':
            self._max_diversification(covariance=cov,
                                      expected_returns=expected_asset_returns)
        else:
            self._max_decorrelation(covariance=cov,
                                    expected_returns=expected_asset_returns)

        # Calculate the portfolio sharpe ratio
        self.portfolio_sharpe_ratio = ((self.portfolio_return - self.risk_free_rate) / (self.portfolio_risk ** 0.5))

        # Do some post-processing of the weights
        self._post_process_weights()

    def allocate_custom_objective(self, custom_objective, asset_names=None, asset_prices=None, expected_asset_returns=None,
                                  covariance_matrix=None, target_return=0.2, target_risk=0.01, risk_aversion=10):
        # pylint: disable=eval-used, too-many-locals
        """
        Create a portfolio using custom objective and constraints.

        :param custom_objective: (dict) A custom objective function with custom constraints. You need to write it in the form
                                        expected by cvxpy. The objective will be a single string while the constraints can be a
                                        list of strings specifying the constraints. For e.g. {'objective': 'cp.Maximisie(
                                        expected_asset_returns)', 'constraints': ['weights >= 0', 'weights <= 1']}.
        :param asset_names: (list) A list of strings containing the asset names.
        :param asset_prices: (pd.DataFrame) A dataframe of historical asset prices (daily close).
        :param expected_asset_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        :param covariance_matrix: (pd.DataFrame/numpy matrix) User supplied covariance matrix of asset returns (sigma).
        :param target_return: (float) Target return of the portfolio.
        :param target_risk: (float) Target risk of the portfolio.
        :param risk_aversion: (float) Quantifies the risk averse nature of the investor - a higher value means
                                      more risk averse and vice-versa.
        """

        self._error_checks(asset_names, asset_prices, expected_asset_returns, covariance_matrix)

        # Calculate the expected asset returns and covariance matrix if not given by the user
        expected_asset_returns, cov = self._calculate_estimators(asset_prices,
                                                                 expected_asset_returns,
                                                                 covariance_matrix)

        weights = cp.Variable(self.num_assets)
        weights.value = np.array([1 / self.num_assets] * self.num_assets)
        risk = cp.quad_form(weights, cov)
        portfolio_return = cp.matmul(weights, expected_asset_returns)

        # Optimisation objective and constraints
        objective, constraints = custom_objective['objective'], custom_objective['constraints']
        allocation_objective = eval(objective)
        allocation_constraints = []
        for constraint in constraints:
            allocation_constraints.append(eval(constraint))

        # Define and solve the problem
        problem = cp.Problem(
            objective=allocation_objective,
            constraints=allocation_constraints
        )
        problem.solve(warm_start=True)
        if weights.value is None:
            raise ValueError('No optimal set of weights found.')

        self.weights = weights.value
        self.portfolio_risk = risk.value
        self.portfolio_return = portfolio_return.value[0]

        # Calculate the portfolio sharpe ratio
        self.portfolio_sharpe_ratio = ((self.portfolio_return - self.risk_free_rate) / (self.portfolio_risk ** 0.5))

        # Do some post-processing of the weights
        self._post_process_weights()

    def get_portfolio_metrics(self):
        """
        Prints the portfolio metrics - return, risk and Sharpe Ratio.
        """

        print("Portfolio Return = %s" % self.portfolio_return)
        print("Portfolio Risk = %s" % self.portfolio_risk)
        print("Portfolio Sharpe Ratio = %s" % self.portfolio_risk)

    def plot_efficient_frontier(self, covariance, expected_asset_returns, min_return=0, max_return=0.4,
                                risk_free_rate=0.05):
        # pylint: disable=broad-except
        """
        Plot the Markowitz efficient frontier.

        :param covariance: (pd.DataFrame) Covariance dataframe of asset returns.
        :param expected_asset_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        :param min_return: (float) Minimum target return.
        :param max_return: (float) Maximum target return.
        :param risk_free_rate: (float) The rate of return for a risk-free asset.
        """

        expected_returns = np.array(expected_asset_returns).reshape((len(expected_asset_returns), 1))
        volatilities = []
        returns = []
        sharpe_ratios = []
        for portfolio_return in np.linspace(min_return, max_return, 100):
            try:
                self.allocate(covariance_matrix=covariance,
                              expected_asset_returns=expected_returns,
                              solution='efficient_risk',
                              target_return=portfolio_return)
                volatilities.append(self.portfolio_risk)
                returns.append(portfolio_return)
                sharpe_ratios.append((portfolio_return - risk_free_rate) / (self.portfolio_risk ** 0.5 + 1e-16))
            except Exception:
                continue
        max_sharpe_ratio_index = sharpe_ratios.index(max(sharpe_ratios))
        min_volatility_index = volatilities.index(min(volatilities))
        figure = plt.scatter(volatilities, returns, c=sharpe_ratios, cmap='viridis')
        plt.colorbar(label='Sharpe Ratio')
        plt.scatter(volatilities[max_sharpe_ratio_index],
                    returns[max_sharpe_ratio_index],
                    marker='*',
                    color='g',
                    s=400,
                    label='Maximum Sharpe Ratio')
        plt.scatter(volatilities[min_volatility_index],
                    returns[min_volatility_index],
                    marker='*',
                    color='r',
                    s=400,
                    label='Minimum Volatility')
        plt.xlabel('Volatility')
        plt.ylabel('Return')
        plt.legend(loc='upper left')
        return figure

    def _error_checks(self, asset_names, asset_prices, expected_asset_returns, covariance_matrix, solution=None):
        """
        Some initial error checks on the inputs.

        :param asset_names: (list) A list of strings containing the asset names.
        :param asset_prices: (pd.DataFrame) A dataframe of historical asset prices (daily close).
        :param expected_asset_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        :param covariance_matrix: (pd.DataFrame/numpy matrix) User supplied covariance matrix of asset returns (sigma).
        :param solution: (str) The type of solution/algorithm to use to calculate the weights.
                               Currently supported solution strings - inverse_variance, min_volatility, max_sharpe,
                               efficient_risk, max_return_min_volatility, max_diversification, efficient_return
                               and max_decorrelation.
        """

        if asset_prices is None and (expected_asset_returns is None or covariance_matrix is None):
            raise ValueError("You need to supply either raw prices or expected returns "
                             "and a covariance matrix of asset returns")

        if asset_prices is not None:
            if not isinstance(asset_prices, pd.DataFrame):
                raise ValueError("Asset prices matrix must be a dataframe")
            if not isinstance(asset_prices.index, pd.DatetimeIndex):
                raise ValueError("Asset prices dataframe must be indexed by date.")

        if solution is not None and solution not in {"inverse_variance", "min_volatility", "max_sharpe", "efficient_risk",
                                                     "max_return_min_volatility", "max_diversification", "efficient_return",
                                                     "max_decorrelation"}:
            raise ValueError("Unknown solution string specified. Supported solutions - "
                             "inverse_variance, min_volatility, max_sharpe, efficient_risk"
                             "max_return_min_volatility, max_diversification, efficient_return and max_decorrelation")

        if asset_names is None:
            if asset_prices is not None:
                asset_names = asset_prices.columns
            elif covariance_matrix is not None and isinstance(covariance_matrix, pd.DataFrame):
                asset_names = covariance_matrix.columns
            else:
                raise ValueError("Please provide a list of asset names")
        self.asset_names = asset_names
        self.num_assets = len(asset_names)

    def _calculate_estimators(self, asset_prices, expected_asset_returns, covariance_matrix):
        """
        Calculate the expected returns and covariance matrix of assets in the portfolio.

        :param asset_prices: (pd.DataFrame) A dataframe of historical asset prices (daily close).
        :param expected_asset_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        :param covariance_matrix: (pd.DataFrame/numpy matrix) User supplied covariance matrix of asset returns (sigma).
        :return: (np.array, pd.DataFrame) Expected asset returns and covariance matrix.
        """

        # Calculate the expected returns if the user does not supply any returns
        if expected_asset_returns is None:
            if self.calculate_expected_returns == "mean":
                expected_asset_returns = self.returns_estimator.calculate_mean_historical_returns(
                    asset_prices=asset_prices)
            elif self.calculate_expected_returns == "exponential":
                expected_asset_returns = self.returns_estimator.calculate_exponential_historical_returns(
                    asset_prices=asset_prices)
            else:
                raise ValueError("Unknown returns specified. Supported returns - mean, exponential")
        expected_asset_returns = np.array(expected_asset_returns).reshape((len(expected_asset_returns), 1))

        # Calculate covariance of returns or use the user specified covariance matrix
        if covariance_matrix is None:
            returns = self.returns_estimator.calculate_returns(asset_prices=asset_prices)
            covariance_matrix = returns.cov()
        cov = pd.DataFrame(covariance_matrix, index=self.asset_names, columns=self.asset_names)

        return expected_asset_returns, cov

    def _post_process_weights(self):
        """
        Check weights for very small numbers and numbers close to 1. A final post-processing of weights produced by the
        optimisation procedures.
        """

        # Round weights which are very very small negative numbers (e.g. -4.7e-16) to 0
        self.weights[self.weights < 0] = 0

        # If any of the weights is very close to one, we convert it to 1 and set the other asset weights to 0.
        if True in set(np.isclose(self.weights, 1)):
            almost_one_index = np.isclose(self.weights, 1)
            self.weights[almost_one_index] = 1
            self.weights[np.logical_not(almost_one_index)] = 0

        self.weights = pd.DataFrame(self.weights)
        self.weights.index = self.asset_names
        self.weights = self.weights.T

    def _inverse_variance(self, covariance, expected_returns):
        """
        Calculate weights using inverse-variance allocation.

        :param covariance: (pd.DataFrame) Covariance dataframe of asset returns.
        :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        """

        ivp = 1. / np.diag(covariance)
        ivp /= ivp.sum()
        self.weights = ivp
        self.portfolio_risk = np.dot(self.weights, np.dot(covariance.values, self.weights.T))
        self.portfolio_return = np.dot(self.weights, expected_returns)[0]

    def _min_volatility(self, covariance, expected_returns):
        # pylint: disable=eval-used
        """
        Compute minimum volatility portfolio allocation.

        :param covariance: (pd.DataFrame) Covariance dataframe of asset returns.
        :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        """

        weights = cp.Variable(self.num_assets)
        weights.value = np.array([1 / self.num_assets] * self.num_assets)
        risk = cp.quad_form(weights, covariance)
        portfolio_return = cp.matmul(weights, expected_returns)

        # Optimisation objective and constraints
        allocation_objective = cp.Minimize(risk)
        allocation_constraints = [
            cp.sum(weights) == 1,
        ]
        if isinstance(self.weight_bounds, tuple):
            allocation_constraints.extend(
                [
                    weights >= self.weight_bounds[0],
                    weights <= min(self.weight_bounds[1], 1)
                ]
            )
        else:
            for inequality in self.weight_bounds:
                allocation_constraints.append(eval(inequality))

            # Add the hard-boundaries for weights.
            allocation_constraints.extend(
                [
                    weights <= 1,
                    weights >= 0
                ]
            )

        # Define and solve the problem
        problem = cp.Problem(
            objective=allocation_objective,
            constraints=allocation_constraints
        )
        problem.solve(warm_start=True)
        if weights.value is None:
            raise ValueError('No optimal set of weights found.')

        self.weights = weights.value
        self.portfolio_risk = risk.value
        self.portfolio_return = portfolio_return.value[0]

    def _max_return_min_volatility(self, covariance, expected_returns, risk_aversion):
        # pylint: disable=eval-used
        """
        Calculate maximum return-minimum volatility portfolio allocation.

        :param covariance: (pd.DataFrame) Covariance dataframe of asset returns.
        :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        :param risk_aversion: (float) Quantifies the risk averse nature of the investor - a higher value means
                           more risk averse and vice-versa.
        """

        weights = cp.Variable(self.num_assets)
        weights.value = np.array([1 / self.num_assets] * self.num_assets)
        portfolio_return = cp.matmul(weights, expected_returns)
        risk = cp.quad_form(weights, covariance)

        # Optimisation objective and constraints
        allocation_objective = cp.Minimize(risk_aversion * risk - portfolio_return)
        allocation_constraints = [
            cp.sum(weights) == 1
        ]
        if isinstance(self.weight_bounds, tuple):
            allocation_constraints.extend(
                [
                    weights >= self.weight_bounds[0],
                    weights <= min(self.weight_bounds[1], 1)
                ]
            )
        else:
            for inequality in self.weight_bounds:
                allocation_constraints.append(eval(inequality))

            # Add the hard-boundaries for weights.
            allocation_constraints.extend(
                [
                    weights <= 1,
                    weights >= 0
                ]
            )

        # Define and solve the problem
        problem = cp.Problem(
            objective=allocation_objective,
            constraints=allocation_constraints
        )
        problem.solve(warm_start=True)
        if weights.value is None:
            raise ValueError('No optimal set of weights found.')

        self.weights = weights.value
        self.portfolio_risk = risk.value
        self.portfolio_return = portfolio_return.value[0]

    def _max_sharpe(self, covariance, expected_returns):
        # pylint: disable=invalid-name, eval-used
        """
        Compute maximum Sharpe portfolio allocation.

        :param covariance: (pd.DataFrame) Covariance dataframe of asset returns.
        :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        """

        y = cp.Variable(self.num_assets)
        y.value = np.array([1 / self.num_assets] * self.num_assets)
        kappa = cp.Variable(1)
        risk = cp.quad_form(y, covariance)
        weights = y / kappa
        portfolio_return = cp.matmul(weights, expected_returns)

        # Optimisation objective and constraints
        allocation_objective = cp.Minimize(risk)
        allocation_constraints = [
            cp.sum((expected_returns - self.risk_free_rate).T @ y) == 1,
            cp.sum(y) == kappa,
            kappa >= 0
        ]
        if isinstance(self.weight_bounds, tuple):
            allocation_constraints.extend(
                [
                    y >= kappa * self.weight_bounds[0],
                    y <= kappa * self.weight_bounds[1]
                ]
            )
        else:
            for inequality in self.weight_bounds:
                allocation_constraints.append(eval(inequality))

            # Add the hard-boundaries for weights.
            allocation_constraints.extend(
                [
                    y <= kappa,
                    y >= 0
                ]
            )

        # Define and solve the problem
        problem = cp.Problem(
            objective=allocation_objective,
            constraints=allocation_constraints
        )
        problem.solve(warm_start=True)
        if y.value is None or kappa.value is None:
            raise ValueError('No optimal set of weights found.')

        self.weights = weights.value
        self.portfolio_risk = risk.value
        self.portfolio_return = portfolio_return.value[0]

    def _min_volatility_for_target_return(self, covariance, expected_returns, target_return):
        # pylint: disable=eval-used
        """
        Calculate minimum volatility portfolio for a given target return.

        :param covariance: (pd.DataFrame) Covariance dataframe of asset returns.
        :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        :param target_return: (float) Target return of the portfolio.
        """

        weights = cp.Variable(self.num_assets)
        weights.value = np.array([1 / self.num_assets] * self.num_assets)
        risk = cp.quad_form(weights, covariance)
        portfolio_return = cp.matmul(weights, expected_returns)

        # Optimisation objective and constraints
        allocation_objective = cp.Minimize(risk)
        allocation_constraints = [
            cp.sum(weights) == 1,
            portfolio_return >= target_return,
        ]
        if isinstance(self.weight_bounds, tuple):
            allocation_constraints.extend(
                [
                    weights >= self.weight_bounds[0],
                    weights <= min(self.weight_bounds[1], 1)
                ]
            )
        else:
            for inequality in self.weight_bounds:
                allocation_constraints.append(eval(inequality))

            # Add the hard-boundaries for weights.
            allocation_constraints.extend(
                [
                    weights <= 1,
                    weights >= 0
                ]
            )

        # Define and solve the problem
        problem = cp.Problem(
            objective=allocation_objective,
            constraints=allocation_constraints
        )
        problem.solve()
        if weights.value is None:
            raise ValueError('No optimal set of weights found.')

        self.weights = weights.value
        self.portfolio_risk = risk.value
        self.portfolio_return = target_return

    def _max_return_for_target_risk(self, covariance, expected_returns, target_risk):
        # pylint: disable=eval-used
        """
        Calculate maximum return for a given target volatility/risk.

        :param covariance: (pd.DataFrame) Covariance dataframe of asset returns.
        :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        :param target_risk: (float) Target risk of the portfolio.
        """

        weights = cp.Variable(self.num_assets)
        weights.value = np.array([1 / self.num_assets] * self.num_assets)
        portfolio_return = cp.matmul(weights, expected_returns)
        risk = cp.quad_form(weights, covariance)

        # Optimisation objective and constraints
        allocation_objective = cp.Maximize(portfolio_return)
        allocation_constraints = [
            cp.sum(weights) == 1,
            risk <= target_risk
        ]
        if isinstance(self.weight_bounds, tuple):
            allocation_constraints.extend(
                [
                    weights >= self.weight_bounds[0],
                    weights <= min(self.weight_bounds[1], 1)
                ]
            )
        else:
            for inequality in self.weight_bounds:
                allocation_constraints.append(eval(inequality))

            # Add the hard-boundaries for weights.
            allocation_constraints.extend(
                [
                    weights <= 1,
                    weights >= 0
                ]
            )

        # Define and solve the problem
        problem = cp.Problem(
            objective=allocation_objective,
            constraints=allocation_constraints
        )
        problem.solve()
        if weights.value is None:
            raise ValueError('No optimal set of weights found.')

        self.weights = weights.value
        self.portfolio_risk = target_risk
        self.portfolio_return = portfolio_return.value[0]

    def _max_diversification(self, covariance, expected_returns):
        """
        Calculate the maximum diversified portfolio.

        :param covariance: (pd.DataFrame) Covariance dataframe of asset returns.
        :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        """

        self._max_decorrelation(covariance, expected_returns)

        # Divide weights by individual asset volatilities
        self.weights /= np.diag(covariance)

        # Standardize weights
        self.weights /= np.sum(self.weights)

        portfolio_return = np.dot(expected_returns.T, self.weights)[0]
        risk = np.dot(self.weights, np.dot(covariance, self.weights.T))

        self.portfolio_risk = risk
        self.portfolio_return = portfolio_return

    def _max_decorrelation(self, covariance, expected_returns):
        # pylint: disable=eval-used
        """
        Calculate the maximum decorrelated portfolio.

        :param covariance: (pd.DataFrame) Covariance dataframe of asset returns.
        :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        """

        weights = cp.Variable(self.num_assets)
        weights.value = np.array([1 / self.num_assets] * self.num_assets)
        risk = cp.quad_form(weights, covariance)
        portfolio_return = cp.matmul(weights, expected_returns)
        corr = self.risk_estimators.cov_to_corr(covariance)
        portfolio_correlation = cp.quad_form(weights, corr)

        # Optimisation objective and constraints
        allocation_objective = cp.Minimize(portfolio_correlation)
        allocation_constraints = [
            cp.sum(weights) == 1
        ]
        if isinstance(self.weight_bounds, tuple):
            allocation_constraints.extend(
                [
                    weights >= self.weight_bounds[0],
                    weights <= min(self.weight_bounds[1], 1)
                ]
            )
        else:
            for inequality in self.weight_bounds:
                allocation_constraints.append(eval(inequality))

            # Add the hard-boundaries for weights.
            allocation_constraints.extend(
                [
                    weights <= 1,
                    weights >= 0
                ]
            )

        # Define and solve the problem
        problem = cp.Problem(
            objective=allocation_objective,
            constraints=allocation_constraints
        )
        problem.solve(warm_start=True)
        if weights.value is None:
            raise ValueError('No optimal set of weights found.')

        self.weights = weights.value
        self.portfolio_risk = risk.value
        self.portfolio_return = portfolio_return.value[0]
Beispiel #4
0
class CLA:
    # pylint: disable=too-many-instance-attributes
    """
    This class implements the famous Critical Line Algorithm for mean-variance portfolio optimisation. It is reproduced with
    modification from the following paper: `D.H. Bailey and M.L. Prado “An Open-Source Implementation of the Critical- Line
    Algorithm for Portfolio Optimization”,Algorithms, 6 (2013), 169-196. <http://dx.doi.org/10.3390/a6010169>`_.

    The Critical Line Algorithm is a famous portfolio optimisation algorithm used for calculating the optimal allocation weights
    for a given portfolio. It solves the optimisation problem with optimisation constraints on each weight - lower and upper
    bounds on the weight value. This class can compute multiple types of solutions -
            1. CLA Turning Points
            2. Minimum Variance
            3. Maximum Sharpe
            4. Efficient Frontier Allocations
    """
    def __init__(self,
                 weight_bounds=(0, 1),
                 calculate_expected_returns="mean"):
        """
        Initialise the storage arrays and some preprocessing.

        :param weight_bounds: (tuple) a tuple specifying the lower and upper bound ranges for the portfolio weights
        :param calculate_expected_returns: (str) the method to use for calculation of expected returns.
                                        Currently supports "mean" and "exponential"
        """

        self.weight_bounds = weight_bounds
        self.calculate_expected_returns = calculate_expected_returns
        self.weights = list()
        self.lambdas = list()
        self.gammas = list()
        self.free_weights = list()
        self.expected_returns = None
        self.cov_matrix = None
        self.lower_bounds = None
        self.upper_bounds = None
        self.max_sharpe = None
        self.min_var = None
        self.efficient_frontier_means = None
        self.efficient_frontier_sigma = None
        self.returns_estimator = ReturnsEstimation()

    @staticmethod
    def _infnone(number):
        """
        Converts a Nonetype object to inf.

        :param number: (int/float/None) a number
        :return: (float) -inf or number
        """
        return float("-inf") if number is None else number

    def _init_algo(self):
        """
        Initial setting up of the algorithm. Calculates the first free weight of the first turning point.

        :return: (list, list) asset index and the corresponding free weight value
        """

        # Form structured array
        structured_array = np.zeros((self.expected_returns.shape[0]),
                                    dtype=[("id", int), ("mu", float)])
        expected_returns = [
            self.expected_returns[i][0]
            for i in range(self.expected_returns.shape[0])
        ]  # dump array into list

        # Fill structured array
        structured_array[:] = list(
            zip(list(range(self.expected_returns.shape[0])), expected_returns))

        # Sort structured array based on increasing return value
        expected_returns = np.sort(structured_array, order="mu")

        # First free weight
        index, weights = expected_returns.shape[0], np.copy(self.lower_bounds)
        while np.sum(weights) < 1:
            index -= 1

            # Set weights one by one to the upper bounds
            weights[expected_returns[index][0]] = self.upper_bounds[
                expected_returns[index][0]]
        weights[expected_returns[index][0]] += 1 - np.sum(weights)
        return [expected_returns[index][0]], weights

    @staticmethod
    def _compute_bi(c_final, asset_bounds_i):
        """
        Calculates which bound value to assign to a bounded asset - lower bound or upper bound.

        :param c_final: (float) a value calculated using the covariance matrices of free weights.
                          Refer to https://pdfs.semanticscholar.org/4fb1/2c1129ba5389bafe47b03e595d098d0252b9.pdf for
                          more information.
        :param asset_bounds_i: (list) a list containing the lower and upper bound values for the ith weight
        :return: bounded weight value
        """

        if c_final > 0:
            return asset_bounds_i[1][0]
        return asset_bounds_i[0][0]

    def _compute_w(self, covar_f_inv, covar_fb, mean_f, w_b):
        """
        Compute the turning point associated with the current set of free weights F.

        :param covar_f_inv: (np.array) inverse of covariance matrix of free assets
        :param covar_fb: (np.array) covariance matrix between free assets and bounded assets
        :param mean_f: (np.array) expected returns of free assets
        :param w_b: (np.array) bounded asset weight values

        :return: (array, float) list of turning point weights and gamma value from the langrange equation
        """

        # Compute gamma
        ones_f = np.ones(mean_f.shape)
        g_1 = np.dot(np.dot(ones_f.T, covar_f_inv), mean_f)
        g_2 = np.dot(np.dot(ones_f.T, covar_f_inv), ones_f)
        if w_b is None:
            g_final, w_1 = float(-self.lambdas[-1] * g_1 / g_2 + 1 / g_2), 0
        else:
            ones_b = np.ones(w_b.shape)
            g_3 = np.dot(ones_b.T, w_b)
            g_4 = np.dot(covar_f_inv, covar_fb)
            w_1 = np.dot(g_4, w_b)
            g_4 = np.dot(ones_f.T, w_1)
            g_final = float(-self.lambdas[-1] * g_1 / g_2 +
                            (1 - g_3 + g_4) / g_2)

        # Compute weights
        w_2 = np.dot(covar_f_inv, ones_f)
        w_3 = np.dot(covar_f_inv, mean_f)
        free_asset_weights = -1 * w_1 + g_final * w_2 + self.lambdas[-1] * w_3
        return free_asset_weights, g_final

    def _compute_lambda(self, covar_f_inv, covar_fb, mean_f, w_b, asset_index,
                        b_i):
        """
        Calculate the lambda value in the langrange optimsation equation.

        :param covar_f_inv: (np.array) inverse of covariance matrix of free assets
        :param covar_fb: (np.array) covariance matrix between free assets and bounded assets
        :param mean_f: (np.array) expected returns of free assets
        :param w_b: (np.array) bounded asset weight values
        :param asset_index: (int) index of the asset in the portfolio
        :param b_i: (list) list of upper and lower bounded weight values
        :return: (float) lambda value
        """

        # Compute C
        ones_f = np.ones(mean_f.shape)
        c_1 = np.dot(np.dot(ones_f.T, covar_f_inv), ones_f)
        c_2 = np.dot(covar_f_inv, mean_f)
        c_3 = np.dot(np.dot(ones_f.T, covar_f_inv), mean_f)
        c_4 = np.dot(covar_f_inv, ones_f)
        c_final = -1 * c_1 * c_2[asset_index] + c_3 * c_4[asset_index]
        if c_final == 0:
            return None, None

        # Compute bi
        if isinstance(b_i, list):
            b_i = self._compute_bi(c_final, b_i)

        # Compute Lambda
        if w_b is None:

            # All free assets
            return float((c_4[asset_index] - c_1 * b_i) / c_final), b_i

        ones_b = np.ones(w_b.shape)
        l_1 = np.dot(ones_b.T, w_b)
        l_2 = np.dot(covar_f_inv, covar_fb)
        l_3 = np.dot(l_2, w_b)
        l_2 = np.dot(ones_f.T, l_3)
        lambda_value = float(((1 - l_1 + l_2) * c_4[asset_index] - c_1 *
                              (b_i + l_3[asset_index])) / c_final)
        return lambda_value, b_i

    def _get_matrices(self, free_weights):
        """
        Calculate the required matrices between free and bounded assets.

        :param free_weights: (list) list of free assets/weights
        :return: (tuple of np.array matrices) the corresponding matrices
        """

        covar_f = self._reduce_matrix(self.cov_matrix, free_weights,
                                      free_weights)
        mean_f = self._reduce_matrix(self.expected_returns, free_weights, [0])
        bounded_weights = self._get_bounded_weights(free_weights)
        covar_fb = self._reduce_matrix(self.cov_matrix, free_weights,
                                       bounded_weights)
        w_b = self._reduce_matrix(self.weights[-1], bounded_weights, [0])
        return covar_f, covar_fb, mean_f, w_b

    def _get_bounded_weights(self, free_weights):
        """
        Compute the list of bounded assets.

        :param free_weights: (np.array) list of free weights/assets
        :return: (np.array) list of bounded assets/weights
        """

        return self._diff_lists(list(range(self.expected_returns.shape[0])),
                                free_weights)

    @staticmethod
    def _diff_lists(list_1, list_2):
        """
        Calculate the set difference between two lists.

        :param list_1: (list) a list of asset indices
        :param list_2: (list) another list of asset indices
        :return: (list) set difference between the two input lists
        """

        return list(set(list_1) - set(list_2))

    @staticmethod
    def _reduce_matrix(matrix, row_indices, col_indices):
        """
        Reduce a matrix to the provided set of rows and columns.

        :param matrix: (np.array) a matrix whose subset of rows and columns we need
        :param row_indices: (list) list of row indices for the matrix
        :param col_indices: (list) list of column indices for the matrix
        :return: (np.array) subset of input matrix
        """

        return matrix[np.ix_(row_indices, col_indices)]

    def _purge_num_err(self, tol):
        """
        Purge violations of inequality constraints (associated with ill-conditioned cov matrix).

        :param tol: (float) tolerance level for purging
        """

        index_1 = 0
        while True:
            flag = False
            if index_1 == len(self.weights):
                break
            if abs(sum(self.weights[index_1]) - 1) > tol:
                flag = True
            else:
                for index_2 in range(len(self.weights[index_1])):
                    if (self.weights[index_1][index_2] -
                            self.lower_bounds[index_2] < -tol
                            or self.weights[index_1][index_2] -
                            self.upper_bounds[index_2] > tol):
                        flag = True
                        break
            if flag is True:
                del self.weights[index_1]
                del self.lambdas[index_1]
                del self.gammas[index_1]
                del self.free_weights[index_1]
            else:
                index_1 += 1

    def _purge_excess(self):
        """
        Remove violations of the convex hull.
        """

        index_1, repeat = 0, False
        while True:
            if repeat is False:
                index_1 += 1
            if index_1 >= len(self.weights) - 1:
                break
            weights = self.weights[index_1]
            mean = np.dot(weights.T, self.expected_returns)[0, 0]
            index_2, repeat = index_1 + 1, False
            while True:
                if index_2 == len(self.weights):
                    break
                weights = self.weights[index_2]
                mean_ = np.dot(weights.T, self.expected_returns)[0, 0]
                if mean < mean_:
                    del self.weights[index_1]
                    del self.lambdas[index_1]
                    del self.gammas[index_1]
                    del self.free_weights[index_1]
                    repeat = True
                    break
                index_2 += 1

    @staticmethod
    def _golden_section(obj, left, right, **kwargs):
        """
        Golden section method. Maximum if kargs['minimum']==False is passed.

        :param obj: (function) The objective function on which the extreme will be found.
        :param left: (float) The leftmost extreme of search
        :param right: (float) The rightmost extreme of search
        """

        tol, sign, args = 1.0e-9, -1, None
        args = kwargs.get("args", None)
        num_iterations = int(ceil(-2.078087 * log(tol / abs(right - left))))
        gs_ratio = 0.618033989
        complementary_gs_ratio = 1.0 - gs_ratio

        # Initialize
        x_1 = gs_ratio * left + complementary_gs_ratio * right
        x_2 = complementary_gs_ratio * left + gs_ratio * right
        f_1 = sign * obj(x_1, *args)
        f_2 = sign * obj(x_2, *args)

        # Loop
        for _ in range(num_iterations):
            if f_1 > f_2:
                left = x_1
                x_1 = x_2
                f_1 = f_2
                x_2 = complementary_gs_ratio * left + gs_ratio * right
                f_2 = sign * obj(x_2, *args)
            else:
                right = x_2
                x_2 = x_1
                f_2 = f_1
                x_1 = gs_ratio * left + complementary_gs_ratio * right
                f_1 = sign * obj(x_1, *args)

        if f_1 < f_2:
            return x_1, sign * f_1
        return x_2, sign * f_2

    def _eval_sr(self, alpha, w_0, w_1):
        """
        Evaluate the sharpe ratio of the portfolio within the convex combination.

        :param alpha: (float) convex combination value
        :param w_0: (list) first endpoint of convex combination of weights
        :param w_1: (list) second endpoint of convex combination of weights
        :return:
        """

        weights = alpha * w_0 + (1 - alpha) * w_1
        returns = np.dot(weights.T, self.expected_returns)[0, 0]
        volatility = np.dot(np.dot(weights.T, self.cov_matrix),
                            weights)[0, 0]**0.5
        return returns / volatility

    def _bound_free_weight(self, free_weights):
        """
        Add a free weight to list of bounded weights.

        :param free_weights: (list) list of free-weight indices
        :return: (float, int, int) lambda value, index of free weight to be bounded, bound weight value
        """

        lambda_in = None
        i_in = None
        bi_in = None
        if len(free_weights) > 1:
            covar_f, covar_fb, mean_f, w_b = self._get_matrices(free_weights)
            covar_f_inv = np.linalg.inv(covar_f)
            j = 0
            for i in free_weights:
                lambda_i, b_i = self._compute_lambda(
                    covar_f_inv, covar_fb, mean_f, w_b, j,
                    [self.lower_bounds[i], self.upper_bounds[i]])
                if self._infnone(lambda_i) > self._infnone(lambda_in):
                    lambda_in, i_in, bi_in = lambda_i, i, b_i
                j += 1
        return lambda_in, i_in, bi_in

    def _free_bound_weight(self, free_weights):
        """
        Add a bounded weight to list of free weights.

        :param free_weights: (list) list of free-weight indices
        :return: (float, int) lambda value, index of the bounded weight to be made free
        """

        lambda_out = None
        i_out = None
        if len(free_weights) < self.expected_returns.shape[0]:
            bounded_weight_indices = self._get_bounded_weights(free_weights)
            for i in bounded_weight_indices:
                covar_f, covar_fb, mean_f, w_b = self._get_matrices(
                    free_weights + [i])
                covar_f_inv = np.linalg.inv(covar_f)
                lambda_i, _ = self._compute_lambda(
                    covar_f_inv,
                    covar_fb,
                    mean_f,
                    w_b,
                    mean_f.shape[0] - 1,
                    self.weights[-1][i],
                )
                if (self.lambdas[-1] is None or lambda_i < self.lambdas[-1]
                    ) and lambda_i > self._infnone(lambda_out):
                    lambda_out, i_out = lambda_i, i
        return lambda_out, i_out

    def _initialise(self, asset_prices, expected_asset_returns,
                    covariance_matrix, resample_by):
        # pylint: disable=invalid-name, too-many-branches, bad-continuation
        """
        Initialise covariances, upper-counds, lower-bounds and storage buffers.

        :param asset_prices: (pd.Dataframe) dataframe of asset prices indexed by date
        :param expected_asset_returns: (list) a list of mean stock returns (mu)
        :param covariance_matrix: (pd.Dataframe) user supplied dataframe of asset returns indexed by date. Used for
                                              calculation of covariance matrix
        :param resample_by: (str) specifies how to resample the prices - weekly, daily, monthly etc.. Defaults to
                                  'B' meaning daily business days which is equivalent to no resampling
        """

        # Calculate the returns if the user does not supply a returns matrix
        self.expected_returns = expected_asset_returns
        if expected_asset_returns is None:
            if self.calculate_expected_returns == "mean":
                self.expected_returns = self.returns_estimator.calculate_mean_historical_returns(
                    asset_prices=asset_prices, resample_by=resample_by)
            elif self.calculate_expected_returns == "exponential":
                self.expected_returns = self.returns_estimator.calculate_exponential_historical_returns(
                    asset_prices=asset_prices, resample_by=resample_by)
            else:
                raise ValueError(
                    "Unknown returns specified. Supported returns - mean, exponential"
                )
        self.expected_returns = np.array(self.expected_returns).reshape(
            (len(self.expected_returns), 1))
        if (self.expected_returns == np.ones(self.expected_returns.shape) *
                self.expected_returns.mean()).all():
            self.expected_returns[-1, 0] += 1e-5

        # Calculate the covariance matrix
        if covariance_matrix is None:
            returns = self.returns_estimator.calculate_returns(
                asset_prices=asset_prices, resample_by=resample_by)
            covariance_matrix = returns.cov()
        self.cov_matrix = np.asarray(covariance_matrix)

        # Intialise lower bounds
        if isinstance(self.weight_bounds[0], numbers.Real):
            self.lower_bounds = np.ones(
                self.expected_returns.shape) * self.weight_bounds[0]
        else:
            self.lower_bounds = np.array(self.weight_bounds[0]).reshape(
                self.expected_returns.shape)

        # Intialise upper bounds
        if isinstance(self.weight_bounds[0], numbers.Real):
            self.upper_bounds = np.ones(
                self.expected_returns.shape) * self.weight_bounds[1]
        else:
            self.upper_bounds = np.array(self.weight_bounds[1]).reshape(
                self.expected_returns.shape)

        # Initialise storage buffers
        self.weights = []
        self.lambdas = []
        self.gammas = []
        self.free_weights = []

    def allocate(self,
                 asset_names,
                 asset_prices=None,
                 expected_asset_returns=None,
                 covariance_matrix=None,
                 solution="cla_turning_points",
                 resample_by=None):
        # pylint: disable=consider-using-enumerate,too-many-locals,too-many-branches,too-many-statements,bad-continuation
        """
        Calculate the portfolio asset allocations using the method specified.

        :param asset_names: (list) a list of strings containing the asset names
        :param asset_prices: (pd.Dataframe) a dataframe of historical asset prices (adj closed)
        :param expected_asset_returns: (list) a list of mean stock returns (mu)
        :param covariance_matrix: (pd.Dataframe/numpy matrix) user supplied covariance matrix of asset returns
        :param solution: (str) specify the type of solution to compute. Options are: cla_turning_points, max_sharpe,
                               min_volatility, efficient_frontier
        :param resample_by: (str) specifies how to resample the prices - weekly, daily, monthly etc.. Defaults to
                                  None for no resampling
        """

        # Initial checks
        if asset_prices is None and (expected_asset_returns is None
                                     or covariance_matrix is None):
            raise ValueError(
                "Either supply your own asset returns matrix or pass the asset prices as input"
            )

        if asset_prices is not None:
            if not isinstance(asset_prices, pd.DataFrame):
                raise ValueError("Asset prices matrix must be a dataframe")
            if not isinstance(asset_prices.index, pd.DatetimeIndex):
                raise ValueError(
                    "Asset prices dataframe must be indexed by date.")

        # Some initial steps before the algorithm runs
        self._initialise(asset_prices=asset_prices,
                         resample_by=resample_by,
                         expected_asset_returns=expected_asset_returns,
                         covariance_matrix=covariance_matrix)

        # Compute the turning points, free sets and weights
        free_weights, weights = self._init_algo()
        self.weights.append(np.copy(weights))  # store solution
        self.lambdas.append(None)
        self.gammas.append(None)
        self.free_weights.append(free_weights[:])
        while True:

            # 1) Bound one free weight
            lambda_in, i_in, bi_in = self._bound_free_weight(free_weights)

            # 2) Free one bounded weight
            lambda_out, i_out = self._free_bound_weight(free_weights)

            # 3) Compute minimum variance solution
            if (lambda_in is None or lambda_in < 0) and (lambda_out is None
                                                         or lambda_out < 0):
                self.lambdas.append(0)
                covar_f, covar_fb, mean_f, w_b = self._get_matrices(
                    free_weights)
                covar_f_inv = np.linalg.inv(covar_f)
                mean_f = np.zeros(mean_f.shape)

            # 4) Decide whether to free a bounded weight or bound a free weight
            else:
                if self._infnone(lambda_in) > self._infnone(lambda_out):
                    self.lambdas.append(lambda_in)
                    free_weights.remove(i_in)
                    weights[i_in] = bi_in  # set value at the correct boundary
                else:
                    self.lambdas.append(lambda_out)
                    free_weights.append(i_out)
                covar_f, covar_fb, mean_f, w_b = self._get_matrices(
                    free_weights)
                covar_f_inv = np.linalg.inv(covar_f)

            # 5) Compute solution vector
            w_f, gamma = self._compute_w(covar_f_inv, covar_fb, mean_f, w_b)
            for i in range(len(free_weights)):
                weights[free_weights[i]] = w_f[i]
            self.weights.append(np.copy(weights))  # store solution
            self.gammas.append(gamma)
            self.free_weights.append(free_weights[:])
            if self.lambdas[-1] == 0:
                break

        # 6) Purge turning points
        self._purge_num_err(10e-10)
        self._purge_excess()

        # Compute the specified solution
        self._compute_solution(assets=asset_names, solution=solution)

    def _compute_solution(self, assets, solution):
        """
        Compute the desired solution to the portfolio optimisation problem.

        :param assets: (list) a list of asset names
        :param solution: (str) specify the type of solution to compute. Options are: cla_turning_points, max_sharpe,
                               min_volatility, efficient_frontier
        """

        if solution == "max_sharpe":
            self.max_sharpe, self.weights = self._max_sharpe()
            self.weights = pd.DataFrame(self.weights)
            self.weights.index = assets
            self.weights = self.weights.T
        elif solution == "min_volatility":
            self.min_var, self.weights = self._min_volatility()
            self.weights = pd.DataFrame(self.weights)
            self.weights.index = assets
            self.weights = self.weights.T
        elif solution == "efficient_frontier":
            self.efficient_frontier_means, self.efficient_frontier_sigma, self.weights = self._efficient_frontier(
            )
            weights_copy = self.weights.copy()
            for i, turning_point in enumerate(weights_copy):
                self.weights[i] = turning_point.reshape(1, -1)[0]
            self.weights = pd.DataFrame(self.weights, columns=assets)
        elif solution == "cla_turning_points":
            # Reshape the weight matrix
            weights_copy = self.weights.copy()
            for i, turning_point in enumerate(weights_copy):
                self.weights[i] = turning_point.reshape(1, -1)[0]
            self.weights = pd.DataFrame(self.weights, columns=assets)
        else:
            raise ValueError(
                "Unknown solution string specified. Supported solutions - cla_turning_points, "
                "efficient_frontier, min_volatility, max_sharpe")

    def _max_sharpe(self):
        """
        Compute the maximum sharpe portfolio allocation.

        :return: (float, np.array) tuple of max. sharpe value and the set of weight allocations
        """

        # 1) Compute the local max SR portfolio between any two neighbor turning points
        w_sr, sharpe_ratios = [], []
        for i in range(len(self.weights) - 1):
            w_0 = np.copy(self.weights[i])
            w_1 = np.copy(self.weights[i + 1])
            kwargs = {"minimum": False, "args": (w_0, w_1)}
            alpha, sharpe_ratio = self._golden_section(self._eval_sr, 0, 1,
                                                       **kwargs)
            w_sr.append(alpha * w_0 + (1 - alpha) * w_1)
            sharpe_ratios.append(sharpe_ratio)

        maximum_sharp_ratio = max(sharpe_ratios)
        weights_with_max_sharpe_ratio = w_sr[sharpe_ratios.index(
            maximum_sharp_ratio)]
        return maximum_sharp_ratio, weights_with_max_sharpe_ratio

    def _min_volatility(self):
        """
        Compute minimum volatility portfolio allocation.

        :return: (float, np.array) tuple of minimum variance value and the set of weight allocations
        """

        var = []
        for weights in self.weights:
            volatility = np.dot(np.dot(weights.T, self.cov_matrix), weights)
            var.append(volatility)
        min_var = min(var)
        return min_var**.5, self.weights[var.index(min_var)]

    def _efficient_frontier(self, points=100):
        # pylint: disable=invalid-name
        """
        Compute the entire efficient frontier solution.

        :param points: (int) number of efficient frontier points to be calculated
        :return: tuple of mean, variance amd weights of the frontier solutions
        """

        means, sigma, weights = [], [], []

        # remove the 1, to avoid duplications
        partitions = np.linspace(0, 1, points // len(self.weights))[:-1]
        b = list(range(len(self.weights) - 1))
        for i in b:
            w_0, w_1 = self.weights[i], self.weights[i + 1]

            if i == b[-1]:
                # include the 1 in the last iteration
                partitions = np.linspace(0, 1, points // len(self.weights))

            for j in partitions:
                w = w_1 * j + (1 - j) * w_0
                weights.append(np.copy(w))
                means.append(np.dot(w.T, self.expected_returns)[0, 0])
                sigma.append(
                    np.dot(np.dot(w.T, self.cov_matrix), w)[0, 0]**0.5)
        return means, sigma, weights
class HierarchicalClusteringAssetAllocation:
    """
    This class implements the Hierarchical Equal Risk Contribution (HERC) algorithm and it's extended components mentioned in the
    following papers: `Raffinot, Thomas, The Hierarchical Equal Risk Contribution Portfolio (August 23,
    2018). <https://ssrn.com/abstract=3237540>`_; and `Raffinot, Thomas, Hierarchical Clustering Based Asset Allocation (May 2017)
    <https://ssrn.com/abstract=2840729>`_;

    While the vanilla Hierarchical Risk Parity algorithm uses only the variance as a risk measure for assigning weights, the HERC
    algorithm proposed by Raffinot, allows investors to use other risk metrics like Expected Shortfall, Sharpe Ratio and
    Conditional Drawdown. Furthermore, it is flexible enough to be easily extended to include custom risk measures of our own.
    """
    def __init__(self,
                 calculate_expected_returns='mean',
                 confidence_level=0.05):
        """
        Initialise.

        :param calculate_expected_returns: (str) The method to use for calculation of expected returns.
                                        Currently supports "mean" and "exponential"
        :param confidence_level: (float) The confidence level (alpha) used for calculating expected shortfall and conditional
                                         drawdown at risk.
        """

        self.weights = list()
        self.clusters = None
        self.ordered_indices = None
        self.cluster_children = None
        self.returns_estimator = ReturnsEstimation()
        self.risk_metrics = RiskMetrics()
        self.calculate_expected_returns = calculate_expected_returns
        self.confidence_level = confidence_level

    def allocate(self,
                 asset_names=None,
                 asset_prices=None,
                 asset_returns=None,
                 covariance_matrix=None,
                 expected_asset_returns=None,
                 allocation_metric='equal_weighting',
                 linkage='ward',
                 optimal_num_clusters=None):
        """
        Calculate asset allocations using the HCAA algorithm.

        :param asset_names: (list) A list of strings containing the asset names.
        :param asset_prices: (pd.DataFrame) A dataframe of historical asset prices (daily close)
                                            indexed by date.
        :param asset_returns: (pd.DataFrame/numpy matrix) User supplied matrix of asset returns.
        :param covariance_matrix: (pd.DataFrame/numpy matrix) User supplied covariance matrix of asset returns.
        :param expected_asset_returns: (list) A list of mean asset returns (mu).
        :param allocation_metric: (str) The metric used for calculating weight allocations. Supported strings - "equal_weighting",
                                        "minimum_variance", "minimum_standard_deviation", "sharpe_ratio", "expected_shortfall",
                                        "conditional_drawdown_risk".
        :param linkage: (str) The type of linkage method to use for clustering. Supported strings - "single", "average", "complete"
                              and "ward".
        :param optimal_num_clusters: (int) Optimal number of clusters for hierarchical clustering.
        """

        # Perform initial checks
        self._perform_checks(asset_prices, asset_returns,
                             expected_asset_returns, allocation_metric)

        # Calculate the expected returns if the user does not supply any returns (only required for sharpe_ratio allocation metric)
        if allocation_metric == 'sharpe_ratio' and expected_asset_returns is None:
            if self.calculate_expected_returns == "mean":
                expected_asset_returns = self.returns_estimator.calculate_mean_historical_returns(
                    asset_prices=asset_prices)
            elif self.calculate_expected_returns == "exponential":
                expected_asset_returns = self.returns_estimator.calculate_exponential_historical_returns(
                    asset_prices=asset_prices)
            else:
                raise ValueError(
                    "Unknown returns specified. Supported returns - mean, exponential"
                )

        if asset_names is None:
            if asset_prices is not None:
                asset_names = asset_prices.columns
            elif asset_returns is not None and isinstance(
                    asset_returns, pd.DataFrame):
                asset_names = asset_returns.columns
            else:
                raise ValueError("Please provide a list of asset names")

        # Calculate the returns if the user does not supply a returns dataframe
        if asset_returns is None:
            if allocation_metric in {'expected_shortfall', 'conditional_drawdown_risk'} or \
                    covariance_matrix is None or not optimal_num_clusters:
                asset_returns = self.returns_estimator.calculate_returns(
                    asset_prices=asset_prices)
        asset_returns = pd.DataFrame(asset_returns, columns=asset_names)

        # Calculate covariance of returns or use the user specified covariance matrix
        if covariance_matrix is None:
            covariance_matrix = asset_returns.cov()
        cov = pd.DataFrame(covariance_matrix,
                           index=asset_names,
                           columns=asset_names)

        # Calculate correlation from covariance matrix
        corr = self._cov2corr(covariance=cov)

        # Calculate the optimal number of clusters using the Gap statistic
        if not optimal_num_clusters:
            optimal_num_clusters = self._get_optimal_number_of_clusters(
                correlation=corr, linkage=linkage, asset_returns=asset_returns)

        # Tree Clustering
        self.clusters, self.cluster_children = self._tree_clustering(
            correlation=corr,
            num_clusters=optimal_num_clusters,
            linkage=linkage)

        # Get the flattened order of assets in hierarchical clustering tree
        num_assets = len(asset_names)
        self.ordered_indices = self._quasi_diagnalization(
            num_assets, 2 * num_assets - 2)

        # Recursive Bisection
        self._recursive_bisection(
            expected_asset_returns=expected_asset_returns,
            asset_returns=asset_returns,
            covariance_matrix=cov,
            assets=asset_names,
            allocation_metric=allocation_metric,
            optimal_num_clusters=optimal_num_clusters)

    @staticmethod
    def _compute_cluster_inertia(labels, asset_returns):
        """
        Calculate the cluster inertia (within cluster sum-of-squares).

        :param labels: (list) Cluster labels.
        :param asset_returns: (pd.DataFrame) Historical asset returns.
        :return: (float) Cluster inertia value.
        """

        unique_labels = np.unique(labels)
        inertia = [
            np.mean(pairwise_distances(asset_returns[:, labels == label]))
            for label in unique_labels
        ]
        inertia = np.log(np.sum(inertia))
        return inertia

    def _get_optimal_number_of_clusters(self,
                                        correlation,
                                        asset_returns,
                                        linkage,
                                        num_reference_datasets=5):
        """
        Find the optimal number of clusters for hierarchical clustering using the Gap statistic.

        :param correlation: (np.array) Matrix of asset correlations.
        :param asset_returns: (pd.DataFrame) Historical asset returns.
        :param linkage: (str) The type of linkage method to use for clustering.
        :param num_reference_datasets: (int) The number of reference datasets to generate for calculating expected inertia.
        :return: (int) The optimal number of clusters.
        """

        max_number_of_clusters = min(10, asset_returns.shape[1])
        original_distance_matrix = np.sqrt(2 * (1 - correlation).round(5))
        gap_values = []
        for num_clusters in range(1, max_number_of_clusters + 1):

            # Calculate expected inertia from reference datasets
            expected_inertia = self._calculate_expected_inertia(
                num_reference_datasets, asset_returns, num_clusters, linkage)

            # Calculate inertia from original data
            original_clusters = scipy_linkage(
                squareform(original_distance_matrix), method=linkage)
            original_cluster_assignments = fcluster(original_clusters,
                                                    num_clusters,
                                                    criterion='maxclust')
            inertia = self._compute_cluster_inertia(
                original_cluster_assignments, asset_returns.values)

            # Calculate the gap statistic
            gap = expected_inertia - inertia
            gap_values.append(gap)

        return 1 + np.argmax(gap_values)

    def _calculate_expected_inertia(self, num_reference_datasets,
                                    asset_returns, num_clusters, linkage):
        """
        Calculate the expected inertia by generating clusters from a uniform distribution.

        :param num_reference_datasets: (int) The number of reference datasets to generate from the distribution.
        :param asset_returns: (pd.DataFrame) Historical asset returns.
        :param num_clusters: (int) The number of clusters to generate.
        :param linkage: (str) The type of linkage criterion to use for hierarchical clustering.
        :return: (float) The expected inertia from the reference datasets.
        """

        reference_inertias = []
        for _ in range(num_reference_datasets):
            # Generate reference returns from uniform distribution and calculate the distance matrix.
            reference_asset_returns = pd.DataFrame(
                np.random.rand(*asset_returns.shape))
            reference_correlation = np.array(reference_asset_returns.corr())
            reference_distance_matrix = np.sqrt(
                2 * (1 - reference_correlation).round(5))

            reference_clusters = scipy_linkage(
                squareform(reference_distance_matrix), method=linkage)
            reference_cluster_assignments = fcluster(reference_clusters,
                                                     num_clusters,
                                                     criterion='maxclust')
            inertia = self._compute_cluster_inertia(
                reference_cluster_assignments, reference_asset_returns.values)
            reference_inertias.append(inertia)
        return np.mean(reference_inertias)

    @staticmethod
    def _tree_clustering(correlation, num_clusters, linkage):
        """
        Perform agglomerative clustering on the current portfolio.

        :param correlation: (np.array) Matrix of asset correlations.
        :param num_clusters: (int) The number of clusters.
        :param linkage (str): The type of linkage method to use for clustering.
        :return: (list) Structure of hierarchical tree.
        """

        distance_matrix = np.sqrt(2 * (1 - correlation).round(5))
        clusters = scipy_linkage(squareform(distance_matrix.values),
                                 method=linkage)
        clustering_inds = fcluster(clusters,
                                   num_clusters,
                                   criterion='maxclust')
        cluster_children = {
            index - 1: []
            for index in range(min(clustering_inds),
                               max(clustering_inds) + 1)
        }
        for index, cluster_index in enumerate(clustering_inds):
            cluster_children[cluster_index - 1].append(index)
        return clusters, cluster_children

    def _quasi_diagnalization(self, num_assets, curr_index):
        """
        Rearrange the assets to reorder them according to hierarchical tree clustering order.

        :param num_assets: (int) The total number of assets.
        :param curr_index: (int) Current index.
        :return: (list) The assets rearranged according to hierarchical clustering.
        """

        if curr_index < num_assets:
            return [curr_index]

        left = int(self.clusters[curr_index - num_assets, 0])
        right = int(self.clusters[curr_index - num_assets, 1])

        return (self._quasi_diagnalization(num_assets, left) +
                self._quasi_diagnalization(num_assets, right))

    def _recursive_bisection(self, expected_asset_returns, asset_returns,
                             covariance_matrix, assets, allocation_metric,
                             optimal_num_clusters):
        """
        Recursively assign weights to the clusters - ultimately assigning weights to the individual assets.

        :param expected_asset_returns: (list) A list of mean asset returns (mu).
        :param asset_returns: (pd.DataFrame) Historical asset returns.
        :param covariance_matrix: (pd.DataFrame) The covariance matrix.
        :param assets: (list) List of asset names in the portfolio.
        :param allocation_metric: (str) The metric used for calculating weight allocations.
        :param optimal_num_clusters: (int) Optimal number of clusters for hierarchical tree clustering.
        """

        num_assets = len(assets)
        self.weights = np.ones(shape=num_assets)
        clusters_contribution = np.ones(shape=optimal_num_clusters)
        clusters_weights = np.ones(shape=optimal_num_clusters)
        clusters_variance = np.ones(shape=optimal_num_clusters)

        # Calculate the corresponding risk measure for the clusters
        self._calculate_risk_contribution_of_clusters(
            clusters_contribution, clusters_variance, allocation_metric,
            optimal_num_clusters, covariance_matrix, expected_asset_returns,
            asset_returns)

        # Recursive bisection taking into account the dendrogram structure
        for cluster_index in range(optimal_num_clusters - 1):

            # Get the left and right cluster ids
            left_cluster_ids, right_cluster_ids = self._get_children_cluster_ids(
                num_assets=num_assets, parent_cluster_id=cluster_index)

            # Compute alpha
            left_cluster_contribution = np.sum(
                clusters_contribution[left_cluster_ids])
            right_cluster_contribution = np.sum(
                clusters_contribution[right_cluster_ids])
            if allocation_metric in {
                    'minimum_variance', 'minimum_standard_deviation',
                    'expected_shortfall', 'conditional_drawdown_risk'
            }:
                alloc_factor = 1 - left_cluster_contribution / (
                    left_cluster_contribution + right_cluster_contribution)
            elif allocation_metric == 'sharpe_ratio':
                alloc_factor = left_cluster_contribution / (
                    left_cluster_contribution + right_cluster_contribution)

                # If sharp ratio allocation factor is not within limits, then calculate normal cluster variance allocation
                # factor
                if alloc_factor < 0 or alloc_factor > 1:
                    left_cluster_variance = np.sum(
                        clusters_variance[left_cluster_ids])
                    right_cluster_variance = np.sum(
                        clusters_variance[right_cluster_ids])
                    alloc_factor = 1 - left_cluster_variance / (
                        left_cluster_variance + right_cluster_variance)
            else:
                alloc_factor = 0.5  # equal weighting

            # Assign weights to each sub-cluster
            clusters_weights[left_cluster_ids] *= alloc_factor
            clusters_weights[right_cluster_ids] *= 1 - alloc_factor

        # Compute the final weights
        self._calculate_final_portfolio_weights(clusters_weights,
                                                covariance_matrix,
                                                optimal_num_clusters)

        # Assign actual asset names to weight index
        self.weights = pd.DataFrame(self.weights)
        self.weights.index = assets[self.ordered_indices]
        self.weights = self.weights.T

    def _calculate_final_portfolio_weights(self, clusters_weights,
                                           covariance_matrix,
                                           optimal_num_clusters):
        """
        Calculate the final asset weights.

        :param clusters_weights: (np.array) The cluster weights calculated using recursive bisection.
        :param covariance_matrix: (pd.DataFrame) The covariance matrix.
        :param optimal_num_clusters: (int) Optimal number of clusters for hierarchical tree clustering.
        """

        for cluster_index in range(optimal_num_clusters):
            cluster_asset_indices = self.cluster_children[cluster_index]
            cluster_covariance = covariance_matrix.iloc[cluster_asset_indices,
                                                        cluster_asset_indices]
            ivp_weights = self._get_inverse_variance_weights(
                cluster_covariance)
            self.weights[
                cluster_asset_indices] = ivp_weights * clusters_weights[
                    cluster_index]

    def _calculate_risk_contribution_of_clusters(
            self, clusters_contribution, clusters_variance, allocation_metric,
            optimal_num_clusters, covariance_matrix, expected_asset_returns,
            asset_returns):
        """
        Calculate the risk contribution of clusters based on the allocation metric.

        :param clusters_contribution: (np.array) The risk contribution value of the clusters.
        :param clusters_variance: (np.array) The variance of the clusters.
        :param allocation_metric: (str) The metric used for calculating weight allocations.
        :param optimal_num_clusters: (int) Optimal number of clusters for hierarchical tree clustering.
        :param covariance_matrix: (pd.DataFrame) The covariance matrix.
        :param expected_asset_returns: (list) A list of mean asset returns (mu).
        :param asset_returns: (pd.DataFrame) Historical asset returns.
        """

        for cluster_index in range(optimal_num_clusters):
            cluster_asset_indices = self.cluster_children[cluster_index]

            if allocation_metric == 'minimum_variance':
                clusters_contribution[
                    cluster_index] = self._get_cluster_variance(
                        covariance_matrix, cluster_asset_indices)
            elif allocation_metric == 'minimum_standard_deviation':
                clusters_contribution[cluster_index] = np.sqrt(
                    self._get_cluster_variance(covariance_matrix,
                                               cluster_asset_indices))
            elif allocation_metric == 'sharpe_ratio':
                clusters_contribution[
                    cluster_index] = self._get_cluster_sharpe_ratio(
                        expected_asset_returns, covariance_matrix,
                        cluster_asset_indices)
                clusters_variance[cluster_index] = self._get_cluster_variance(
                    covariance_matrix, cluster_asset_indices)
            elif allocation_metric == 'expected_shortfall':
                clusters_contribution[
                    cluster_index] = self._get_cluster_expected_shortfall(
                        asset_returns=asset_returns,
                        covariance=covariance_matrix,
                        cluster_indices=cluster_asset_indices)
            elif allocation_metric == 'conditional_drawdown_risk':
                clusters_contribution[
                    cluster_index] = self._get_cluster_conditional_drawdown_at_risk(
                        asset_returns=asset_returns,
                        covariance=covariance_matrix,
                        cluster_indices=cluster_asset_indices)

    def _get_children_cluster_ids(self, num_assets, parent_cluster_id):
        """
        Find the left and right children cluster id of the given parent cluster id.

        :param num_assets: (int) The number of assets in the portfolio.
        :param parent_cluster_index: (int) The current parent cluster id.
        :return: (list, list) List of cluster ids to the left and right of the parent cluster in the hierarchical tree.
        """

        left = int(self.clusters[num_assets - 2 - parent_cluster_id, 0])
        right = int(self.clusters[num_assets - 2 - parent_cluster_id, 1])
        left_cluster = self._quasi_diagnalization(num_assets, left)
        right_cluster = self._quasi_diagnalization(num_assets, right)

        left_cluster_ids = []
        right_cluster_ids = []
        for id_cluster, cluster in self.cluster_children.items():
            if sorted(self._intersection(left_cluster,
                                         cluster)) == sorted(cluster):
                left_cluster_ids.append(id_cluster)
            if sorted(self._intersection(right_cluster,
                                         cluster)) == sorted(cluster):
                right_cluster_ids.append(id_cluster)

        return left_cluster_ids, right_cluster_ids

    @staticmethod
    def _get_inverse_variance_weights(covariance):
        """
        Calculate the inverse variance weight allocations.

        :param covariance: (pd.DataFrame) Covariance matrix of assets.
        :return: (list) Inverse variance weight values.
        """

        inv_diag = 1 / np.diag(covariance.values)
        parity_w = inv_diag * (1 / np.sum(inv_diag))
        return parity_w

    def _get_cluster_variance(self, covariance, cluster_indices):
        """
        Calculate cluster variance.

        :param covariance: (pd.DataFrame) Covariance matrix of assets.
        :param cluster_indices: (list) List of asset indices for the cluster.
        :return: (float) Variance of the cluster.
        """

        cluster_covariance = covariance.iloc[cluster_indices, cluster_indices]
        parity_w = self._get_inverse_variance_weights(cluster_covariance)
        cluster_variance = self.risk_metrics.calculate_variance(
            covariance=cluster_covariance, weights=parity_w)
        return cluster_variance

    def _get_cluster_sharpe_ratio(self, expected_asset_returns, covariance,
                                  cluster_indices):
        """
        Calculate cluster Sharpe Ratio.

        :param expected_asset_returns: (list) A list of mean asset returns (mu).
        :param covariance: (pd.DataFrame) Covariance matrix of assets.
        :param cluster_indices: (list) List of asset indices for the cluster.
        :return: (float) Sharpe ratio of the cluster.
        """

        cluster_expected_returns = expected_asset_returns[cluster_indices]
        cluster_covariance = covariance.iloc[cluster_indices, cluster_indices]
        parity_w = self._get_inverse_variance_weights(cluster_covariance)
        cluster_variance = self.risk_metrics.calculate_variance(
            covariance=cluster_covariance, weights=parity_w)
        cluster_sharpe_ratio = (
            parity_w @ cluster_expected_returns) / np.sqrt(cluster_variance)
        return cluster_sharpe_ratio

    def _get_cluster_expected_shortfall(self, asset_returns, covariance,
                                        cluster_indices):
        """
        Calculate cluster expected shortfall.

        :param asset_returns: (pd.DataFrame) Historical asset returns.
        :param covariance: (pd.DataFrame) Covariance matrix of assets.
        :param cluster_indices: (list) List of asset indices for the cluster.
        :return: (float) Expected shortfall of the cluster.
        """

        cluster_asset_returns = asset_returns.iloc[:, cluster_indices]
        cluster_covariance = covariance.iloc[cluster_indices, cluster_indices]
        parity_w = self._get_inverse_variance_weights(cluster_covariance)
        portfolio_returns = cluster_asset_returns @ parity_w
        cluster_expected_shortfall = self.risk_metrics.calculate_expected_shortfall(
            returns=portfolio_returns, confidence_level=self.confidence_level)
        return cluster_expected_shortfall

    def _get_cluster_conditional_drawdown_at_risk(self, asset_returns,
                                                  covariance, cluster_indices):
        """
        Calculate cluster conditional drawdown at risk.

        :param asset_returns: (pd.DataFrame) Historical asset returns.
        :param covariance: (pd.DataFrame) Covariance matrix of assets.
        :param cluster_indices: (list) List of asset indices for the cluster.
        :return: (float) CDD of the cluster.
        """

        cluster_asset_returns = asset_returns.iloc[:, cluster_indices]
        cluster_covariance = covariance.iloc[cluster_indices, cluster_indices]
        parity_w = self._get_inverse_variance_weights(cluster_covariance)
        portfolio_returns = cluster_asset_returns @ parity_w
        cluster_conditional_drawdown = self.risk_metrics.calculate_conditional_drawdown_risk(
            returns=portfolio_returns, confidence_level=self.confidence_level)
        return cluster_conditional_drawdown

    @staticmethod
    def _intersection(list1, list2):
        """
        Calculate the intersection of two lists

        :param list1: (list) The first list of items.
        :param list2: (list) The second list of items.
        :return: (list) List containing the intersection of the input lists.
        """

        return list(set(list1) & set(list2))

    @staticmethod
    def _cov2corr(covariance):
        """
        Calculate the correlations from asset returns covariance matrix.

        :param covariance: (pd.DataFrame) Asset returns covariances.
        :return: (pd.DataFrame) Correlations between asset returns.
        """

        d_matrix = np.zeros_like(covariance)
        diagnoal_sqrt = np.sqrt(np.diag(covariance))
        np.fill_diagonal(d_matrix, diagnoal_sqrt)
        d_inv = np.linalg.inv(d_matrix)
        corr = np.dot(np.dot(d_inv, covariance), d_inv)
        corr = pd.DataFrame(corr,
                            index=covariance.columns,
                            columns=covariance.columns)
        return corr

    @staticmethod
    def _perform_checks(asset_prices, asset_returns, expected_asset_returns,
                        allocation_metric):
        """
        Perform initial warning checks.

        :param asset_prices: (pd.DataFrame) A dataframe of historical asset prices (daily close)
                                            indexed by date.
        :param asset_returns: (pd.DataFrame/numpy matrix) User supplied matrix of asset returns.
        :param expected_asset_returns: (list) A list of mean asset returns (mu).
        :param allocation_metric: (str) The metric used for calculating weight allocations.
        """

        if asset_prices is None and asset_returns is None and expected_asset_returns is None:
            raise ValueError(
                "You need to supply either raw prices or returns or expected asset returns."
            )

        if asset_prices is not None:
            if not isinstance(asset_prices, pd.DataFrame):
                raise ValueError("Asset prices matrix must be a dataframe")
            if not isinstance(asset_prices.index, pd.DatetimeIndex):
                raise ValueError(
                    "Asset prices dataframe must be indexed by date.")

        if allocation_metric not in \
                {'minimum_variance', 'minimum_standard_deviation', 'sharpe_ratio',
                 'equal_weighting', 'expected_shortfall', 'conditional_drawdown_risk'}:
            raise ValueError(
                "Unknown allocation metric specified. Supported metrics are - minimum_variance, "
                "minimum_standard_deviation, sharpe_ratio, equal_weighting, expected_shortfall, "
                "conditional_drawdown_risk")

        if allocation_metric == 'sharpe_ratio' and expected_asset_returns is None and asset_prices is None:
            raise ValueError(
                "An expected asset returns list is required for sharpe ratio metric. Either provide pre-calculated"
                "expected asset returns or give raw asset prices for inbuilt returns calculation."
            )