class MeanVarianceOptimisation: """ This class implements some classic mean-variance optimisation techniques for calculating the efficient frontier solutions. With the help of quadratic optimisers, users can generate optimal portfolios for different objective functions. Currently solutions to the following portfolios can be generated: 1. Inverse Variance 2. Maximum Sharpe 3. Minimum Volatility 4. Efficient Risk """ def __init__(self, calculate_expected_returns='mean'): """ Constructor. :param calculate_expected_returns: (str) the method to use for calculation of expected returns. Currently supports "mean" and "exponential" """ self.weights = list() self.portfolio_risk = None self.portfolio_return = None self.portfolio_sharpe_ratio = None self.calculate_expected_returns = calculate_expected_returns self.returns_estimator = ReturnsEstimation() self.weight_bounds = None def allocate(self, asset_names, asset_prices=None, expected_asset_returns=None, covariance_matrix=None, solution='inverse_variance', risk_free_rate=0.05, target_return=0.2, weight_bounds=(0, 1), resample_by=None): # pylint: disable=invalid-name, too-many-branches, bad-continuation """ Calculate the portfolio asset allocations using the method specified. :param asset_names: (list) a list of strings containing the asset names :param asset_prices: (pd.Dataframe) a dataframe of historical asset prices (daily close) :param expected_asset_returns: (list/np.array/pd.dataframe) a list of mean stock returns (mu) :param covariance_matrix: (pd.Dataframe/numpy matrix) user supplied covariance matrix of asset returns (sigma) :param solution: (str) the type of solution/algorithm to use to calculate the weights. Currently supported solution strings - inverse_variance, min_volatility, max_sharpe and efficient_risk :param risk_free_rate: (float) the rate of return for a risk-free asset. :param target_return: (float) target return of the portfolio :param weight_bounds: (dict/tuple) can be either a single tuple of upper and lower bounds for all portfolio weights or a dictionary mapping of individual asset indices to tuples of upper and lower bounds. Those indices which do not have any mapping will have a (0, 1) default bound. :param resample_by: (str) specifies how to resample the prices - weekly, daily, monthly etc.. Defaults to None for no resampling """ if asset_prices is None and expected_asset_returns is None and covariance_matrix is None: raise ValueError( "You need to supply either raw prices or expected returns " "and a covariance matrix of asset returns") if asset_prices is not None: if not isinstance(asset_prices, pd.DataFrame): raise ValueError("Asset prices matrix must be a dataframe") if not isinstance(asset_prices.index, pd.DatetimeIndex): raise ValueError( "Asset prices dataframe must be indexed by date.") # Weight bounds self.weight_bounds = weight_bounds # Calculate the expected returns if the user does not supply any returns if expected_asset_returns is None: if self.calculate_expected_returns == "mean": expected_asset_returns = self.returns_estimator.calculate_mean_historical_returns( asset_prices=asset_prices, resample_by=resample_by) elif self.calculate_expected_returns == "exponential": expected_asset_returns = self.returns_estimator.calculate_exponential_historical_returns( asset_prices=asset_prices, resample_by=resample_by) else: raise ValueError( "Unknown returns specified. Supported returns - mean, exponential" ) expected_asset_returns = np.array(expected_asset_returns).reshape( (len(expected_asset_returns), 1)) # Calculate covariance of returns or use the user specified covariance matrix if covariance_matrix is None: returns = self.returns_estimator.calculate_returns( asset_prices=asset_prices, resample_by=resample_by) covariance_matrix = returns.cov() cov = pd.DataFrame(covariance_matrix, index=asset_names, columns=asset_names) if solution == 'inverse_variance': self.weights = self._inverse_variance(covariance=cov) elif solution == 'min_volatility': self.weights, self.portfolio_risk = self._min_volatility( covariance=cov, num_assets=len(asset_names)) elif solution == 'max_sharpe': self.weights, self.portfolio_risk, self.portfolio_return = self._max_sharpe( covariance=cov, expected_returns=expected_asset_returns, risk_free_rate=risk_free_rate, num_assets=len(asset_names)) elif solution == 'efficient_risk': self.weights, self.portfolio_risk, self.portfolio_return = self._min_volatility_for_target_return( covariance=cov, expected_returns=expected_asset_returns, target_return=target_return, num_assets=len(asset_names)) else: raise ValueError( "Unknown solution string specified. Supported solutions - " "inverse_variance, min_volatility, max_sharpe and efficient_risk." ) # Round weights which are very very small negative numbers (e.g. -4.7e-16) to 0 negative_weight_indices = np.argwhere(self.weights < 0) self.weights[negative_weight_indices] = np.round( self.weights[negative_weight_indices], 3) # Calculate the portfolio risk and return if it has not been calculated if self.portfolio_risk is None: self.portfolio_risk = np.dot(self.weights, np.dot(cov.values, self.weights.T)) if self.portfolio_return is None: self.portfolio_return = np.dot(self.weights, expected_asset_returns) self.portfolio_sharpe_ratio = ( (self.portfolio_return - risk_free_rate) / (self.portfolio_risk**0.5)) self.weights = pd.DataFrame(self.weights) self.weights.index = asset_names self.weights = self.weights.T @staticmethod def _inverse_variance(covariance): """ Calculate weights using inverse-variance allocation. :param covariance: (pd.Dataframe) covariance dataframe of asset returns :return: (np.array) array of portfolio weights """ ivp = 1. / np.diag(covariance) ivp /= ivp.sum() return ivp def _min_volatility(self, covariance, num_assets): """ Compute minimum volatility portfolio allocation. :param covariance: (pd.Dataframe) covariance dataframe of asset returns :param num_assets: (int) number of assets in the portfolio :return: (np.array, float) portfolio weights and risk value """ weights = cp.Variable(num_assets) weights.value = np.array([1 / num_assets] * num_assets) risk = cp.quad_form(weights, covariance) # Optimisation objective and constraints allocation_objective = cp.Minimize(risk) allocation_constraints = [ cp.sum(weights) == 1, ] if isinstance(self.weight_bounds, tuple): allocation_constraints.extend([ weights >= self.weight_bounds[0], weights <= min(self.weight_bounds[1], 1) ]) if isinstance(self.weight_bounds, dict): asset_indices = list(range(num_assets)) for asset_index in asset_indices: lower_bound, upper_bound = self.weight_bounds.get( asset_index, (0, 1)) allocation_constraints.extend([ weights[asset_index] >= lower_bound, weights[asset_index] <= min(upper_bound, 1) ]) # Define and solve the problem problem = cp.Problem(objective=allocation_objective, constraints=allocation_constraints) problem.solve(warm_start=True) if weights.value is None: raise ValueError('No optimal set of weights found.') return weights.value, risk.value**0.5 def _max_sharpe(self, covariance, expected_returns, risk_free_rate, num_assets): # pylint: disable=invalid-name """ Compute maximum Sharpe portfolio allocation. :param covariance: (pd.Dataframe) covariance dataframe of asset returns :param expected_asset_returns: (list/np.array/pd.dataframe) a list of mean stock returns (mu) :param risk_free_rate: (float) the rate of return for a risk-free asset. :param num_assets: (int) number of assets in the portfolio :return: (np.array, float, float) portfolio weights, risk value and return value """ y = cp.Variable(num_assets) y.value = np.array([1 / num_assets] * num_assets) kappa = cp.Variable(1) risk = cp.quad_form(y, covariance) # Optimisation objective and constraints allocation_objective = cp.Minimize(risk) allocation_constraints = [ cp.sum((expected_returns - risk_free_rate).T @ y) == 1, cp.sum(y) == kappa, kappa >= 0 ] if isinstance(self.weight_bounds, tuple): allocation_constraints.extend([ y >= kappa * self.weight_bounds[0], y <= kappa * self.weight_bounds[1] ]) if isinstance(self.weight_bounds, dict): asset_indices = list(range(num_assets)) for asset_index in asset_indices: lower_bound, upper_bound = self.weight_bounds.get( asset_index, (0, 1)) allocation_constraints.extend([ y[asset_index] >= kappa * lower_bound, y[asset_index] <= kappa * upper_bound ]) # Define and solve the problem problem = cp.Problem(objective=allocation_objective, constraints=allocation_constraints) problem.solve(warm_start=True) if y.value is None or kappa.value is None: raise ValueError('No optimal set of weights found.') weights = y.value / kappa.value portfolio_return = (expected_returns.T @ weights)[0] return weights, risk.value**0.5, portfolio_return def _min_volatility_for_target_return(self, covariance, expected_returns, target_return, num_assets): """ Calculate minimum volatility portfolio for a given target return. :param covariance: (pd.Dataframe) covariance dataframe of asset returns :param expected_asset_returns: (list/np.array/pd.dataframe) a list of mean stock returns (mu) :param target_return: (float) target return of the portfolio :param num_assets: (int) number of assets in the portfolio :return: (np.array, float, float) portfolio weights, risk value and return value """ weights = cp.Variable(num_assets) risk = cp.quad_form(weights, covariance) # Optimisation objective and constraints allocation_objective = cp.Minimize(risk) allocation_constraints = [ cp.sum(weights) == 1, (expected_returns.T @ weights)[0] == target_return, ] if isinstance(self.weight_bounds, tuple): allocation_constraints.extend([ weights >= self.weight_bounds[0], weights <= min(self.weight_bounds[1], 1) ]) if isinstance(self.weight_bounds, dict): asset_indices = list(range(num_assets)) for asset_index in asset_indices: lower_bound, upper_bound = self.weight_bounds.get( asset_index, (0, 1)) allocation_constraints.extend([ weights[asset_index] >= lower_bound, weights[asset_index] <= min(upper_bound, 1) ]) # Define and solve the problem problem = cp.Problem(objective=allocation_objective, constraints=allocation_constraints) problem.solve() if weights.value is None: raise ValueError('No optimal set of weights found.') return weights.value, risk.value**0.5, target_return def plot_efficient_frontier(self, covariance, expected_asset_returns, num_assets, min_return=0, max_return=0.4, risk_free_rate=0.05): # pylint: disable=bad-continuation, broad-except """ Plot the Markowitz efficient frontier. :param covariance: (pd.Dataframe) covariance dataframe of asset returns :param expected_asset_returns: (list/np.array/pd.dataframe) a list of mean stock returns (mu) :param num_assets: (int) number of assets in the portfolio :param min_return: (float) minimum target return :param max_return: (float) maximum target return :param risk_free_rate: (float) the rate of return for a risk-free asset. """ expected_returns = np.array(expected_asset_returns).reshape( (len(expected_asset_returns), 1)) volatilities = [] returns = [] sharpe_ratios = [] for portfolio_return in np.linspace(min_return, max_return, 100): _, risk, _ = self._min_volatility_for_target_return( covariance=covariance, expected_returns=expected_returns, target_return=portfolio_return, num_assets=num_assets) volatilities.append(risk) returns.append(portfolio_return) sharpe_ratios.append( (portfolio_return - risk_free_rate) / (risk**0.5 + 1e-16)) max_sharpe_ratio_index = sharpe_ratios.index(max(sharpe_ratios)) min_volatility_index = volatilities.index(min(volatilities)) figure = plt.scatter(volatilities, returns, c=sharpe_ratios, cmap='viridis') plt.colorbar(label='Sharpe Ratio') plt.scatter(volatilities[max_sharpe_ratio_index], returns[max_sharpe_ratio_index], marker='*', color='g', s=400, label='Maximum Sharpe Ratio') plt.scatter(volatilities[min_volatility_index], returns[min_volatility_index], marker='*', color='r', s=400, label='Minimum Volatility') plt.xlabel('Volatility') plt.ylabel('Return') plt.legend(loc='upper left') return figure
class HierarchicalClusteringAssetAllocation: """ This class implements the Hierarchical Equal Risk Contribution (HERC) algorithm and it's extended components mentioned in the following papers: `Raffinot, Thomas, The Hierarchical Equal Risk Contribution Portfolio (August 23, 2018). <https://ssrn.com/abstract=3237540>`_; and `Raffinot, Thomas, Hierarchical Clustering Based Asset Allocation (May 2017) <https://ssrn.com/abstract=2840729>`_; While the vanilla Hierarchical Risk Parity algorithm uses only the variance as a risk measure for assigning weights, the HERC algorithm proposed by Raffinot, allows investors to use other risk metrics like Expected Shortfall, Sharpe Ratio and Conditional Drawdown. Furthermore, it is flexible enough to be easily extended to include custom risk measures of our own. """ def __init__(self, calculate_expected_returns='mean'): """ Constructor. :param calculate_expected_returns: (str) the method to use for calculation of expected returns. Currently supports "mean" and "exponential" """ self.weights = list() self.clusters = None self.ordered_indices = None self.returns_estimator = ReturnsEstimation() self.risk_metrics = RiskMetrics() self.calculate_expected_returns = calculate_expected_returns @staticmethod def _compute_cluster_inertia(labels, asset_returns): """ Calculate the cluster inertia (within cluster sum-of-squares). :param labels: (list) cluster labels :param asset_returns: (pd.DataFrame) historical asset returns :return: (float) cluster inertia value """ unique_labels = np.unique(labels) inertia = [np.mean(pairwise_distances(asset_returns[:, labels == label])) for label in unique_labels] inertia = np.log(np.sum(inertia)) return inertia def _get_optimal_number_of_clusters(self, correlation, asset_returns, num_reference_datasets=5, max_number_of_clusters=10): """ Find the optimal number of clusters for hierarchical clustering using the Gap statistic. :param correlation: (np.array) matrix of asset correlations :param asset_returns: (pd.DataFrame) historical asset returns :param num_reference_datasets: (int) the number of reference datasets to generate for calculating expected inertia :param max_number_of_clusters: (int) the maximum number of clusters to check for finding the optimal value :return: (int) the optimal number of clusters """ cluster_func = AgglomerativeClustering(affinity='precomputed', linkage='single') original_distance_matrix = np.sqrt(2 * (1 - correlation).round(5)) gap_values = [] for num_clusters in range(1, max_number_of_clusters + 1): cluster_func.n_clusters = num_clusters # Calculate expected inertia from reference datasets reference_inertias = [] for _ in range(num_reference_datasets): # Generate reference returns from uniform distribution and calculate the distance matrix. reference_asset_returns = pd.DataFrame(np.random.rand(*asset_returns.shape)) reference_correlation = np.array(reference_asset_returns.corr()) reference_distance_matrix = np.sqrt(2 * (1 - reference_correlation).round(5)) reference_cluster_assignments = cluster_func.fit_predict(reference_distance_matrix) inertia = self._compute_cluster_inertia(reference_cluster_assignments, reference_asset_returns.values) reference_inertias.append(inertia) expected_inertia = np.mean(reference_inertias) # Calculate inertia from original data original_cluster_asignments = cluster_func.fit_predict(original_distance_matrix) inertia = self._compute_cluster_inertia(original_cluster_asignments, asset_returns.values) # Calculate the gap statistic gap = expected_inertia - inertia gap_values.append(gap) return np.argmax(gap_values) @staticmethod def _tree_clustering(correlation, num_clusters): """ Perform agglomerative clustering on the current portfolio. :param correlation: (np.array) matrix of asset correlations :param num_clusters: (int) the number of clusters :return: (list) structure of hierarchical tree """ cluster_func = AgglomerativeClustering(n_clusters=num_clusters, affinity='precomputed', linkage='single') distance_matrix = np.sqrt(2 * (1 - correlation).round(5)) cluster_func.fit(distance_matrix) return cluster_func.children_ def _quasi_diagnalization(self, num_assets, curr_index): """ Rearrange the assets to reorder them according to hierarchical tree clustering order. :param num_assets: (int) the total number of assets :param curr_index: (int) current index :return: (list) the assets rearranged according to hierarchical clustering """ if curr_index < num_assets: return [curr_index] left = int(self.clusters[curr_index - num_assets, 0]) right = int(self.clusters[curr_index - num_assets, 1]) return (self._quasi_diagnalization(num_assets, left) + self._quasi_diagnalization(num_assets, right)) @staticmethod def _get_inverse_variance_weights(covariance): ''' Calculate the inverse variance weight allocations. :param covariance: (pd.DataFrame) covariance matrix of assets :return: (list) inverse variance weight values ''' inv_diag = 1 / np.diag(covariance.values) parity_w = inv_diag * (1 / np.sum(inv_diag)) return parity_w def _get_cluster_variance(self, covariance, cluster_indices): """ Calculate cluster variance. :param covariance: (pd.DataFrame) covariance matrix of assets :param cluster_indices: (list) list of asset indices for the cluster :return: (float) variance of the cluster """ cluster_covariance = covariance.iloc[cluster_indices, cluster_indices] parity_w = self._get_inverse_variance_weights(cluster_covariance) cluster_variance = self.risk_metrics.calculate_variance(covariance=cluster_covariance, weights=parity_w) return cluster_variance def _get_cluster_sharpe_ratio(self, expected_asset_returns, covariance, cluster_indices): """ Calculate cluster Sharpe Ratio. :param expected_asset_returns: (list) a list of mean asset returns (mu) :param covariance: (pd.DataFrame) covariance matrix of assets :param cluster_indices: (list) list of asset indices for the cluster :return: (float) sharpe ratio of the cluster """ cluster_expected_returns = expected_asset_returns[cluster_indices] cluster_covariance = covariance.iloc[cluster_indices, cluster_indices] parity_w = self._get_inverse_variance_weights(cluster_covariance) cluster_variance = self.risk_metrics.calculate_variance(covariance=cluster_covariance, weights=parity_w) cluster_sharpe_ratio = (parity_w @ cluster_expected_returns) / np.sqrt(cluster_variance) return cluster_sharpe_ratio def _get_cluster_expected_shortfall(self, asset_returns, covariance, confidence_level, cluster_indices): """ Calculate cluster expected shortfall. :param asset_returns: (pd.DataFrame) historical asset returns :param covariance: (pd.DataFrame) covariance matrix of assets :param confidence_level: (float) the confidence level (alpha) :param cluster_indices: (list) list of asset indices for the cluster :return: (float) expected shortfall of the cluster """ cluster_asset_returns = asset_returns.iloc[:, cluster_indices] cluster_covariance = covariance.iloc[cluster_indices, cluster_indices] parity_w = self._get_inverse_variance_weights(cluster_covariance) portfolio_returns = cluster_asset_returns @ parity_w cluster_expected_shortfall = self.risk_metrics.calculate_expected_shortfall(returns=portfolio_returns, confidence_level=confidence_level) return cluster_expected_shortfall def _get_cluster_conditional_drawdown_at_risk(self, asset_returns, covariance, confidence_level, cluster_indices): """ Calculate cluster conditional drawdown at risk. :param asset_returns: (pd.DataFrame) historical asset returns :param covariance: (pd.DataFrame) covariance matrix of assets :param confidence_level: (float) the confidence level (alpha) :param cluster_indices: (list) list of asset indices for the cluster :return: (float) CDD of the cluster """ cluster_asset_returns = asset_returns.iloc[:, cluster_indices] cluster_covariance = covariance.iloc[cluster_indices, cluster_indices] parity_w = self._get_inverse_variance_weights(cluster_covariance) portfolio_returns = cluster_asset_returns @ parity_w cluster_conditional_drawdown = self.risk_metrics.calculate_conditional_drawdown_risk(returns=portfolio_returns, confidence_level=confidence_level) return cluster_conditional_drawdown def _recursive_bisection(self, expected_asset_returns, asset_returns, covariance_matrix, assets, allocation_metric, confidence_level): # pylint: disable=bad-continuation, too-many-locals """ Recursively assign weights to the clusters - ultimately assigning weights to the individual assets. :param expected_asset_returns: (list) a list of mean asset returns (mu) :param asset_returns: (pd.DataFrame) historical asset returns :param covariance_matrix: (pd.DataFrame) the covariance matrix :param assets: (list) list of asset names in the portfolio :param allocation_metric: (str) the metric used for calculating weight allocations :param confidence_level: (float) the confidence level (alpha) """ self.weights = pd.Series(1, index=self.ordered_indices) clustered_alphas = [self.ordered_indices] while clustered_alphas: clustered_alphas = [cluster[start:end] for cluster in clustered_alphas for start, end in ((0, len(cluster) // 2), (len(cluster) // 2, len(cluster))) if len(cluster) > 1] for subcluster in range(0, len(clustered_alphas), 2): left_cluster = clustered_alphas[subcluster] right_cluster = clustered_alphas[subcluster + 1] # Calculate allocation factor based on the metric if allocation_metric == 'minimum_variance': left_cluster_variance = self._get_cluster_variance(covariance_matrix, left_cluster) right_cluster_variance = self._get_cluster_variance(covariance_matrix, right_cluster) alloc_factor = 1 - left_cluster_variance / (left_cluster_variance + right_cluster_variance) elif allocation_metric == 'minimum_standard_deviation': left_cluster_sd = np.sqrt(self._get_cluster_variance(covariance_matrix, left_cluster)) right_cluster_sd = np.sqrt(self._get_cluster_variance(covariance_matrix, right_cluster)) alloc_factor = 1 - left_cluster_sd / (left_cluster_sd + right_cluster_sd) elif allocation_metric == 'sharpe_ratio': left_cluster_sharpe_ratio = self._get_cluster_sharpe_ratio(expected_asset_returns, covariance_matrix, left_cluster) right_cluster_sharpe_ratio = self._get_cluster_sharpe_ratio(expected_asset_returns, covariance_matrix, right_cluster) alloc_factor = left_cluster_sharpe_ratio / (left_cluster_sharpe_ratio + right_cluster_sharpe_ratio) if alloc_factor < 0 or alloc_factor > 1: left_cluster_variance = self._get_cluster_variance(covariance_matrix, left_cluster) right_cluster_variance = self._get_cluster_variance(covariance_matrix, right_cluster) alloc_factor = 1 - left_cluster_variance / (left_cluster_variance + right_cluster_variance) elif allocation_metric == 'expected_shortfall': left_cluster_expected_shortfall = self._get_cluster_expected_shortfall(asset_returns=asset_returns, covariance=covariance_matrix, confidence_level=confidence_level, cluster_indices=left_cluster) right_cluster_expected_shortfall = self._get_cluster_expected_shortfall(asset_returns=asset_returns, covariance=covariance_matrix, confidence_level=confidence_level, cluster_indices=right_cluster) alloc_factor = \ 1 - left_cluster_expected_shortfall / (left_cluster_expected_shortfall + right_cluster_expected_shortfall) elif allocation_metric == 'conditional_drawdown_risk': left_cluster_conditional_drawdown = self._get_cluster_conditional_drawdown_at_risk(asset_returns=asset_returns, covariance=covariance_matrix, confidence_level=confidence_level, cluster_indices=left_cluster) right_cluster_conditional_drawdown = self._get_cluster_conditional_drawdown_at_risk(asset_returns=asset_returns, covariance=covariance_matrix, confidence_level=confidence_level, cluster_indices=right_cluster) alloc_factor = \ 1 - left_cluster_conditional_drawdown / (left_cluster_conditional_drawdown + right_cluster_conditional_drawdown) else: alloc_factor = 0.5 # equal weighting # Assign weights to each sub-cluster self.weights[left_cluster] *= alloc_factor self.weights[right_cluster] *= 1 - alloc_factor # Assign actual asset values to weight index self.weights.index = assets[self.ordered_indices] self.weights = pd.DataFrame(self.weights) self.weights = self.weights.T @staticmethod def _cov2corr(covariance): """ Calculate the correlations from asset returns covariance matrix. :param covariance: (pd.DataFrame) asset returns covariances :return: (pd.DataFrame) correlations between asset returns """ d_matrix = np.zeros_like(covariance) diagnoal_sqrt = np.sqrt(np.diag(covariance)) np.fill_diagonal(d_matrix, diagnoal_sqrt) d_inv = np.linalg.inv(d_matrix) corr = np.dot(np.dot(d_inv, covariance), d_inv) corr = pd.DataFrame(corr, index=covariance.columns, columns=covariance.columns) return corr @staticmethod def _perform_checks(asset_prices, asset_returns, covariance_matrix, allocation_metric): # pylint: disable=bad-continuation """ Perform initial warning checks. :param asset_prices: (pd.DataFrame) a dataframe of historical asset prices (daily close) indexed by date :param asset_returns: (pd.DataFrame/numpy matrix) user supplied matrix of asset returns :param covariance_matrix: (pd.DataFrame/numpy matrix) user supplied covariance matrix of asset returns :param allocation_metric: (str) the metric used for calculating weight allocations :return: """ if asset_prices is None and asset_returns is None and covariance_matrix is None: raise ValueError("You need to supply either raw prices or returns or a covariance matrix of asset returns") if asset_prices is not None: if not isinstance(asset_prices, pd.DataFrame): raise ValueError("Asset prices matrix must be a dataframe") if not isinstance(asset_prices.index, pd.DatetimeIndex): raise ValueError("Asset prices dataframe must be indexed by date.") if allocation_metric not in \ {'minimum_variance', 'minimum_standard_deviation', 'sharpe_ratio', 'equal_weighting', 'expected_shortfall', 'conditional_drawdown_risk'}: raise ValueError("Unknown allocation metric specified. Supported metrics are - minimum_variance, " "minimum_standard_deviation, sharpe_ratio, equal_weighting, expected_shortfall, " "conditional_drawdown_risk") def allocate(self, asset_names, asset_prices=None, asset_returns=None, covariance_matrix=None, expected_asset_returns=None, allocation_metric='equal_weighting', confidence_level=0.05, optimal_num_clusters=None, resample_by=None): """ Calculate asset allocations using the HCAA algorithm. :param asset_names: (list) a list of strings containing the asset names :param asset_prices: (pd.DataFrame) a dataframe of historical asset prices (daily close) indexed by date :param asset_returns: (pd.DataFrame/numpy matrix) user supplied matrix of asset returns :param covariance_matrix: (pd.DataFrame/numpy matrix) user supplied covariance matrix of asset returns :param expected_asset_returns: (list) a list of mean asset returns (mu) :param allocation_metric: (str) the metric used for calculating weight allocations :param confidence_level: (float) the confidence level (alpha) used for calculating expected shortfall and conditional drawdown at risk :param optimal_num_clusters: (int) optimal number of clusters for hierarchical clustering :param resample_by: (str) specifies how to resample the prices - weekly, daily, monthly etc.. Defaults to None for no resampling """ # Perform initial checks self._perform_checks(asset_prices, asset_returns, covariance_matrix, allocation_metric) # Calculate the expected returns if the user does not supply any returns if allocation_metric == 'sharpe_ratio' and expected_asset_returns is None: if asset_prices is None: raise ValueError( "Either provide pre-calculated expected returns or give raw asset prices for inbuilt returns calculation") if self.calculate_expected_returns == "mean": expected_asset_returns = self.returns_estimator.calculate_mean_historical_returns( asset_prices=asset_prices, resample_by=resample_by) elif self.calculate_expected_returns == "exponential": expected_asset_returns = self.returns_estimator.calculate_exponential_historical_returns( asset_prices=asset_prices, resample_by=resample_by) else: raise ValueError("Unknown returns specified. Supported returns - mean, exponential") # Calculate the returns if the user does not supply a returns dataframe if asset_returns is None: asset_returns = self.returns_estimator.calculate_returns(asset_prices=asset_prices, resample_by=resample_by) asset_returns = pd.DataFrame(asset_returns, columns=asset_names) # Calculate covariance of returns or use the user specified covariance matrix if covariance_matrix is None: covariance_matrix = asset_returns.cov() cov = pd.DataFrame(covariance_matrix, index=asset_names, columns=asset_names) # Calculate correlation from covariance matrix corr = self._cov2corr(covariance=cov) # Calculate the optimal number of clusters using the Gap statistic if not optimal_num_clusters: optimal_num_clusters = self._get_optimal_number_of_clusters(correlation=corr, asset_returns=asset_returns) # Tree Clustering self.clusters = self._tree_clustering(correlation=corr, num_clusters=optimal_num_clusters) # Quasi Diagnalization num_assets = len(asset_names) self.ordered_indices = self._quasi_diagnalization(num_assets, 2 * num_assets - 2) # Recursive Bisection self._recursive_bisection(expected_asset_returns=expected_asset_returns, asset_returns=asset_returns, covariance_matrix=cov, assets=asset_names, allocation_metric=allocation_metric, confidence_level=confidence_level)
class MeanVarianceOptimisation: # pylint: disable=too-many-instance-attributes """ This class implements some classic mean-variance optimisation techniques for calculating the efficient frontier solutions. With the help of quadratic optimisers, users can generate optimal portfolios for different objective functions. Currently solutions to the following portfolios can be generated: 1. Inverse Variance 2. Maximum Sharpe 3. Minimum Volatility 4. Efficient Risk 5. Maximum Return - Minimum Volatility 6. Efficient Return 7. Maximum Diversification 8. Maximum Decorrelation 9. Custom Objective Function """ def __init__(self, calculate_expected_returns='mean', risk_free_rate=0.03): """ Constructor. :param calculate_expected_returns: (str) The method to use for calculation of expected returns. Currently supports "mean" and "exponential". """ self.weights = list() self.asset_names = None self.num_assets = None self.portfolio_risk = None self.portfolio_return = None self.portfolio_sharpe_ratio = None self.calculate_expected_returns = calculate_expected_returns self.returns_estimator = ReturnsEstimation() self.risk_estimators = RiskEstimators() self.weight_bounds = (0, 1) self.risk_free_rate = risk_free_rate def allocate(self, asset_names=None, asset_prices=None, expected_asset_returns=None, covariance_matrix=None, solution='inverse_variance', target_return=0.2, target_risk=0.01, risk_aversion=10, weight_bounds=None): # pylint: disable=invalid-name, too-many-branches """ Calculate the portfolio asset allocations using the method specified. :param asset_names: (list) A list of strings containing the asset names. :param asset_prices: (pd.DataFrame) A dataframe of historical asset prices (daily close). :param expected_asset_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu). :param covariance_matrix: (pd.DataFrame/numpy matrix) User supplied covariance matrix of asset returns (sigma). :param solution: (str) The type of solution/algorithm to use to calculate the weights. Currently supported solution strings - inverse_variance, min_volatility, max_sharpe, efficient_risk, max_return_min_volatility, max_diversification, efficient_return and max_decorrelation. :param target_return: (float) Target return of the portfolio. :param target_risk: (float) Target risk of the portfolio. :param risk_aversion: (float) Quantifies the risk averse nature of the investor - a higher value means more risk averse and vice-versa. :param weight_bounds: (dict/tuple) Can be either a single tuple of upper and lower bounds for all portfolio weights or a list of strings with each string representing an inequality on the weights. For e.g. to bound the weight of the 3rd asset pass the following weight bounds: ['weights[2] <= 0.3', 'weights[2] >= 0.1']. """ self._error_checks(asset_names, asset_prices, expected_asset_returns, covariance_matrix, solution) # Weight bounds if weight_bounds is not None: self.weight_bounds = weight_bounds # Calculate the expected asset returns and covariance matrix if not given by the user expected_asset_returns, cov = self._calculate_estimators(asset_prices, expected_asset_returns, covariance_matrix) if solution == 'inverse_variance': self._inverse_variance(covariance=cov, expected_returns=expected_asset_returns) elif solution == 'min_volatility': self._min_volatility(covariance=cov, expected_returns=expected_asset_returns) elif solution == 'max_return_min_volatility': self._max_return_min_volatility(covariance=cov, expected_returns=expected_asset_returns, risk_aversion=risk_aversion) elif solution == 'max_sharpe': self._max_sharpe(covariance=cov, expected_returns=expected_asset_returns) elif solution == 'efficient_risk': self._min_volatility_for_target_return(covariance=cov, expected_returns=expected_asset_returns, target_return=target_return) elif solution == 'efficient_return': self._max_return_for_target_risk(covariance=cov, expected_returns=expected_asset_returns, target_risk=target_risk) elif solution == 'max_diversification': self._max_diversification(covariance=cov, expected_returns=expected_asset_returns) else: self._max_decorrelation(covariance=cov, expected_returns=expected_asset_returns) # Calculate the portfolio sharpe ratio self.portfolio_sharpe_ratio = ((self.portfolio_return - self.risk_free_rate) / (self.portfolio_risk ** 0.5)) # Do some post-processing of the weights self._post_process_weights() def allocate_custom_objective(self, custom_objective, asset_names=None, asset_prices=None, expected_asset_returns=None, covariance_matrix=None, target_return=0.2, target_risk=0.01, risk_aversion=10): # pylint: disable=eval-used, too-many-locals """ Create a portfolio using custom objective and constraints. :param custom_objective: (dict) A custom objective function with custom constraints. You need to write it in the form expected by cvxpy. The objective will be a single string while the constraints can be a list of strings specifying the constraints. For e.g. {'objective': 'cp.Maximisie( expected_asset_returns)', 'constraints': ['weights >= 0', 'weights <= 1']}. :param asset_names: (list) A list of strings containing the asset names. :param asset_prices: (pd.DataFrame) A dataframe of historical asset prices (daily close). :param expected_asset_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu). :param covariance_matrix: (pd.DataFrame/numpy matrix) User supplied covariance matrix of asset returns (sigma). :param target_return: (float) Target return of the portfolio. :param target_risk: (float) Target risk of the portfolio. :param risk_aversion: (float) Quantifies the risk averse nature of the investor - a higher value means more risk averse and vice-versa. """ self._error_checks(asset_names, asset_prices, expected_asset_returns, covariance_matrix) # Calculate the expected asset returns and covariance matrix if not given by the user expected_asset_returns, cov = self._calculate_estimators(asset_prices, expected_asset_returns, covariance_matrix) weights = cp.Variable(self.num_assets) weights.value = np.array([1 / self.num_assets] * self.num_assets) risk = cp.quad_form(weights, cov) portfolio_return = cp.matmul(weights, expected_asset_returns) # Optimisation objective and constraints objective, constraints = custom_objective['objective'], custom_objective['constraints'] allocation_objective = eval(objective) allocation_constraints = [] for constraint in constraints: allocation_constraints.append(eval(constraint)) # Define and solve the problem problem = cp.Problem( objective=allocation_objective, constraints=allocation_constraints ) problem.solve(warm_start=True) if weights.value is None: raise ValueError('No optimal set of weights found.') self.weights = weights.value self.portfolio_risk = risk.value self.portfolio_return = portfolio_return.value[0] # Calculate the portfolio sharpe ratio self.portfolio_sharpe_ratio = ((self.portfolio_return - self.risk_free_rate) / (self.portfolio_risk ** 0.5)) # Do some post-processing of the weights self._post_process_weights() def get_portfolio_metrics(self): """ Prints the portfolio metrics - return, risk and Sharpe Ratio. """ print("Portfolio Return = %s" % self.portfolio_return) print("Portfolio Risk = %s" % self.portfolio_risk) print("Portfolio Sharpe Ratio = %s" % self.portfolio_risk) def plot_efficient_frontier(self, covariance, expected_asset_returns, min_return=0, max_return=0.4, risk_free_rate=0.05): # pylint: disable=broad-except """ Plot the Markowitz efficient frontier. :param covariance: (pd.DataFrame) Covariance dataframe of asset returns. :param expected_asset_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu). :param min_return: (float) Minimum target return. :param max_return: (float) Maximum target return. :param risk_free_rate: (float) The rate of return for a risk-free asset. """ expected_returns = np.array(expected_asset_returns).reshape((len(expected_asset_returns), 1)) volatilities = [] returns = [] sharpe_ratios = [] for portfolio_return in np.linspace(min_return, max_return, 100): try: self.allocate(covariance_matrix=covariance, expected_asset_returns=expected_returns, solution='efficient_risk', target_return=portfolio_return) volatilities.append(self.portfolio_risk) returns.append(portfolio_return) sharpe_ratios.append((portfolio_return - risk_free_rate) / (self.portfolio_risk ** 0.5 + 1e-16)) except Exception: continue max_sharpe_ratio_index = sharpe_ratios.index(max(sharpe_ratios)) min_volatility_index = volatilities.index(min(volatilities)) figure = plt.scatter(volatilities, returns, c=sharpe_ratios, cmap='viridis') plt.colorbar(label='Sharpe Ratio') plt.scatter(volatilities[max_sharpe_ratio_index], returns[max_sharpe_ratio_index], marker='*', color='g', s=400, label='Maximum Sharpe Ratio') plt.scatter(volatilities[min_volatility_index], returns[min_volatility_index], marker='*', color='r', s=400, label='Minimum Volatility') plt.xlabel('Volatility') plt.ylabel('Return') plt.legend(loc='upper left') return figure def _error_checks(self, asset_names, asset_prices, expected_asset_returns, covariance_matrix, solution=None): """ Some initial error checks on the inputs. :param asset_names: (list) A list of strings containing the asset names. :param asset_prices: (pd.DataFrame) A dataframe of historical asset prices (daily close). :param expected_asset_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu). :param covariance_matrix: (pd.DataFrame/numpy matrix) User supplied covariance matrix of asset returns (sigma). :param solution: (str) The type of solution/algorithm to use to calculate the weights. Currently supported solution strings - inverse_variance, min_volatility, max_sharpe, efficient_risk, max_return_min_volatility, max_diversification, efficient_return and max_decorrelation. """ if asset_prices is None and (expected_asset_returns is None or covariance_matrix is None): raise ValueError("You need to supply either raw prices or expected returns " "and a covariance matrix of asset returns") if asset_prices is not None: if not isinstance(asset_prices, pd.DataFrame): raise ValueError("Asset prices matrix must be a dataframe") if not isinstance(asset_prices.index, pd.DatetimeIndex): raise ValueError("Asset prices dataframe must be indexed by date.") if solution is not None and solution not in {"inverse_variance", "min_volatility", "max_sharpe", "efficient_risk", "max_return_min_volatility", "max_diversification", "efficient_return", "max_decorrelation"}: raise ValueError("Unknown solution string specified. Supported solutions - " "inverse_variance, min_volatility, max_sharpe, efficient_risk" "max_return_min_volatility, max_diversification, efficient_return and max_decorrelation") if asset_names is None: if asset_prices is not None: asset_names = asset_prices.columns elif covariance_matrix is not None and isinstance(covariance_matrix, pd.DataFrame): asset_names = covariance_matrix.columns else: raise ValueError("Please provide a list of asset names") self.asset_names = asset_names self.num_assets = len(asset_names) def _calculate_estimators(self, asset_prices, expected_asset_returns, covariance_matrix): """ Calculate the expected returns and covariance matrix of assets in the portfolio. :param asset_prices: (pd.DataFrame) A dataframe of historical asset prices (daily close). :param expected_asset_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu). :param covariance_matrix: (pd.DataFrame/numpy matrix) User supplied covariance matrix of asset returns (sigma). :return: (np.array, pd.DataFrame) Expected asset returns and covariance matrix. """ # Calculate the expected returns if the user does not supply any returns if expected_asset_returns is None: if self.calculate_expected_returns == "mean": expected_asset_returns = self.returns_estimator.calculate_mean_historical_returns( asset_prices=asset_prices) elif self.calculate_expected_returns == "exponential": expected_asset_returns = self.returns_estimator.calculate_exponential_historical_returns( asset_prices=asset_prices) else: raise ValueError("Unknown returns specified. Supported returns - mean, exponential") expected_asset_returns = np.array(expected_asset_returns).reshape((len(expected_asset_returns), 1)) # Calculate covariance of returns or use the user specified covariance matrix if covariance_matrix is None: returns = self.returns_estimator.calculate_returns(asset_prices=asset_prices) covariance_matrix = returns.cov() cov = pd.DataFrame(covariance_matrix, index=self.asset_names, columns=self.asset_names) return expected_asset_returns, cov def _post_process_weights(self): """ Check weights for very small numbers and numbers close to 1. A final post-processing of weights produced by the optimisation procedures. """ # Round weights which are very very small negative numbers (e.g. -4.7e-16) to 0 self.weights[self.weights < 0] = 0 # If any of the weights is very close to one, we convert it to 1 and set the other asset weights to 0. if True in set(np.isclose(self.weights, 1)): almost_one_index = np.isclose(self.weights, 1) self.weights[almost_one_index] = 1 self.weights[np.logical_not(almost_one_index)] = 0 self.weights = pd.DataFrame(self.weights) self.weights.index = self.asset_names self.weights = self.weights.T def _inverse_variance(self, covariance, expected_returns): """ Calculate weights using inverse-variance allocation. :param covariance: (pd.DataFrame) Covariance dataframe of asset returns. :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu). """ ivp = 1. / np.diag(covariance) ivp /= ivp.sum() self.weights = ivp self.portfolio_risk = np.dot(self.weights, np.dot(covariance.values, self.weights.T)) self.portfolio_return = np.dot(self.weights, expected_returns)[0] def _min_volatility(self, covariance, expected_returns): # pylint: disable=eval-used """ Compute minimum volatility portfolio allocation. :param covariance: (pd.DataFrame) Covariance dataframe of asset returns. :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu). """ weights = cp.Variable(self.num_assets) weights.value = np.array([1 / self.num_assets] * self.num_assets) risk = cp.quad_form(weights, covariance) portfolio_return = cp.matmul(weights, expected_returns) # Optimisation objective and constraints allocation_objective = cp.Minimize(risk) allocation_constraints = [ cp.sum(weights) == 1, ] if isinstance(self.weight_bounds, tuple): allocation_constraints.extend( [ weights >= self.weight_bounds[0], weights <= min(self.weight_bounds[1], 1) ] ) else: for inequality in self.weight_bounds: allocation_constraints.append(eval(inequality)) # Add the hard-boundaries for weights. allocation_constraints.extend( [ weights <= 1, weights >= 0 ] ) # Define and solve the problem problem = cp.Problem( objective=allocation_objective, constraints=allocation_constraints ) problem.solve(warm_start=True) if weights.value is None: raise ValueError('No optimal set of weights found.') self.weights = weights.value self.portfolio_risk = risk.value self.portfolio_return = portfolio_return.value[0] def _max_return_min_volatility(self, covariance, expected_returns, risk_aversion): # pylint: disable=eval-used """ Calculate maximum return-minimum volatility portfolio allocation. :param covariance: (pd.DataFrame) Covariance dataframe of asset returns. :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu). :param risk_aversion: (float) Quantifies the risk averse nature of the investor - a higher value means more risk averse and vice-versa. """ weights = cp.Variable(self.num_assets) weights.value = np.array([1 / self.num_assets] * self.num_assets) portfolio_return = cp.matmul(weights, expected_returns) risk = cp.quad_form(weights, covariance) # Optimisation objective and constraints allocation_objective = cp.Minimize(risk_aversion * risk - portfolio_return) allocation_constraints = [ cp.sum(weights) == 1 ] if isinstance(self.weight_bounds, tuple): allocation_constraints.extend( [ weights >= self.weight_bounds[0], weights <= min(self.weight_bounds[1], 1) ] ) else: for inequality in self.weight_bounds: allocation_constraints.append(eval(inequality)) # Add the hard-boundaries for weights. allocation_constraints.extend( [ weights <= 1, weights >= 0 ] ) # Define and solve the problem problem = cp.Problem( objective=allocation_objective, constraints=allocation_constraints ) problem.solve(warm_start=True) if weights.value is None: raise ValueError('No optimal set of weights found.') self.weights = weights.value self.portfolio_risk = risk.value self.portfolio_return = portfolio_return.value[0] def _max_sharpe(self, covariance, expected_returns): # pylint: disable=invalid-name, eval-used """ Compute maximum Sharpe portfolio allocation. :param covariance: (pd.DataFrame) Covariance dataframe of asset returns. :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu). """ y = cp.Variable(self.num_assets) y.value = np.array([1 / self.num_assets] * self.num_assets) kappa = cp.Variable(1) risk = cp.quad_form(y, covariance) weights = y / kappa portfolio_return = cp.matmul(weights, expected_returns) # Optimisation objective and constraints allocation_objective = cp.Minimize(risk) allocation_constraints = [ cp.sum((expected_returns - self.risk_free_rate).T @ y) == 1, cp.sum(y) == kappa, kappa >= 0 ] if isinstance(self.weight_bounds, tuple): allocation_constraints.extend( [ y >= kappa * self.weight_bounds[0], y <= kappa * self.weight_bounds[1] ] ) else: for inequality in self.weight_bounds: allocation_constraints.append(eval(inequality)) # Add the hard-boundaries for weights. allocation_constraints.extend( [ y <= kappa, y >= 0 ] ) # Define and solve the problem problem = cp.Problem( objective=allocation_objective, constraints=allocation_constraints ) problem.solve(warm_start=True) if y.value is None or kappa.value is None: raise ValueError('No optimal set of weights found.') self.weights = weights.value self.portfolio_risk = risk.value self.portfolio_return = portfolio_return.value[0] def _min_volatility_for_target_return(self, covariance, expected_returns, target_return): # pylint: disable=eval-used """ Calculate minimum volatility portfolio for a given target return. :param covariance: (pd.DataFrame) Covariance dataframe of asset returns. :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu). :param target_return: (float) Target return of the portfolio. """ weights = cp.Variable(self.num_assets) weights.value = np.array([1 / self.num_assets] * self.num_assets) risk = cp.quad_form(weights, covariance) portfolio_return = cp.matmul(weights, expected_returns) # Optimisation objective and constraints allocation_objective = cp.Minimize(risk) allocation_constraints = [ cp.sum(weights) == 1, portfolio_return >= target_return, ] if isinstance(self.weight_bounds, tuple): allocation_constraints.extend( [ weights >= self.weight_bounds[0], weights <= min(self.weight_bounds[1], 1) ] ) else: for inequality in self.weight_bounds: allocation_constraints.append(eval(inequality)) # Add the hard-boundaries for weights. allocation_constraints.extend( [ weights <= 1, weights >= 0 ] ) # Define and solve the problem problem = cp.Problem( objective=allocation_objective, constraints=allocation_constraints ) problem.solve() if weights.value is None: raise ValueError('No optimal set of weights found.') self.weights = weights.value self.portfolio_risk = risk.value self.portfolio_return = target_return def _max_return_for_target_risk(self, covariance, expected_returns, target_risk): # pylint: disable=eval-used """ Calculate maximum return for a given target volatility/risk. :param covariance: (pd.DataFrame) Covariance dataframe of asset returns. :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu). :param target_risk: (float) Target risk of the portfolio. """ weights = cp.Variable(self.num_assets) weights.value = np.array([1 / self.num_assets] * self.num_assets) portfolio_return = cp.matmul(weights, expected_returns) risk = cp.quad_form(weights, covariance) # Optimisation objective and constraints allocation_objective = cp.Maximize(portfolio_return) allocation_constraints = [ cp.sum(weights) == 1, risk <= target_risk ] if isinstance(self.weight_bounds, tuple): allocation_constraints.extend( [ weights >= self.weight_bounds[0], weights <= min(self.weight_bounds[1], 1) ] ) else: for inequality in self.weight_bounds: allocation_constraints.append(eval(inequality)) # Add the hard-boundaries for weights. allocation_constraints.extend( [ weights <= 1, weights >= 0 ] ) # Define and solve the problem problem = cp.Problem( objective=allocation_objective, constraints=allocation_constraints ) problem.solve() if weights.value is None: raise ValueError('No optimal set of weights found.') self.weights = weights.value self.portfolio_risk = target_risk self.portfolio_return = portfolio_return.value[0] def _max_diversification(self, covariance, expected_returns): """ Calculate the maximum diversified portfolio. :param covariance: (pd.DataFrame) Covariance dataframe of asset returns. :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu). """ self._max_decorrelation(covariance, expected_returns) # Divide weights by individual asset volatilities self.weights /= np.diag(covariance) # Standardize weights self.weights /= np.sum(self.weights) portfolio_return = np.dot(expected_returns.T, self.weights)[0] risk = np.dot(self.weights, np.dot(covariance, self.weights.T)) self.portfolio_risk = risk self.portfolio_return = portfolio_return def _max_decorrelation(self, covariance, expected_returns): # pylint: disable=eval-used """ Calculate the maximum decorrelated portfolio. :param covariance: (pd.DataFrame) Covariance dataframe of asset returns. :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu). """ weights = cp.Variable(self.num_assets) weights.value = np.array([1 / self.num_assets] * self.num_assets) risk = cp.quad_form(weights, covariance) portfolio_return = cp.matmul(weights, expected_returns) corr = self.risk_estimators.cov_to_corr(covariance) portfolio_correlation = cp.quad_form(weights, corr) # Optimisation objective and constraints allocation_objective = cp.Minimize(portfolio_correlation) allocation_constraints = [ cp.sum(weights) == 1 ] if isinstance(self.weight_bounds, tuple): allocation_constraints.extend( [ weights >= self.weight_bounds[0], weights <= min(self.weight_bounds[1], 1) ] ) else: for inequality in self.weight_bounds: allocation_constraints.append(eval(inequality)) # Add the hard-boundaries for weights. allocation_constraints.extend( [ weights <= 1, weights >= 0 ] ) # Define and solve the problem problem = cp.Problem( objective=allocation_objective, constraints=allocation_constraints ) problem.solve(warm_start=True) if weights.value is None: raise ValueError('No optimal set of weights found.') self.weights = weights.value self.portfolio_risk = risk.value self.portfolio_return = portfolio_return.value[0]
class CLA: # pylint: disable=too-many-instance-attributes """ This class implements the famous Critical Line Algorithm for mean-variance portfolio optimisation. It is reproduced with modification from the following paper: `D.H. Bailey and M.L. Prado “An Open-Source Implementation of the Critical- Line Algorithm for Portfolio Optimization”,Algorithms, 6 (2013), 169-196. <http://dx.doi.org/10.3390/a6010169>`_. The Critical Line Algorithm is a famous portfolio optimisation algorithm used for calculating the optimal allocation weights for a given portfolio. It solves the optimisation problem with optimisation constraints on each weight - lower and upper bounds on the weight value. This class can compute multiple types of solutions - 1. CLA Turning Points 2. Minimum Variance 3. Maximum Sharpe 4. Efficient Frontier Allocations """ def __init__(self, weight_bounds=(0, 1), calculate_expected_returns="mean"): """ Initialise the storage arrays and some preprocessing. :param weight_bounds: (tuple) a tuple specifying the lower and upper bound ranges for the portfolio weights :param calculate_expected_returns: (str) the method to use for calculation of expected returns. Currently supports "mean" and "exponential" """ self.weight_bounds = weight_bounds self.calculate_expected_returns = calculate_expected_returns self.weights = list() self.lambdas = list() self.gammas = list() self.free_weights = list() self.expected_returns = None self.cov_matrix = None self.lower_bounds = None self.upper_bounds = None self.max_sharpe = None self.min_var = None self.efficient_frontier_means = None self.efficient_frontier_sigma = None self.returns_estimator = ReturnsEstimation() @staticmethod def _infnone(number): """ Converts a Nonetype object to inf. :param number: (int/float/None) a number :return: (float) -inf or number """ return float("-inf") if number is None else number def _init_algo(self): """ Initial setting up of the algorithm. Calculates the first free weight of the first turning point. :return: (list, list) asset index and the corresponding free weight value """ # Form structured array structured_array = np.zeros((self.expected_returns.shape[0]), dtype=[("id", int), ("mu", float)]) expected_returns = [ self.expected_returns[i][0] for i in range(self.expected_returns.shape[0]) ] # dump array into list # Fill structured array structured_array[:] = list( zip(list(range(self.expected_returns.shape[0])), expected_returns)) # Sort structured array based on increasing return value expected_returns = np.sort(structured_array, order="mu") # First free weight index, weights = expected_returns.shape[0], np.copy(self.lower_bounds) while np.sum(weights) < 1: index -= 1 # Set weights one by one to the upper bounds weights[expected_returns[index][0]] = self.upper_bounds[ expected_returns[index][0]] weights[expected_returns[index][0]] += 1 - np.sum(weights) return [expected_returns[index][0]], weights @staticmethod def _compute_bi(c_final, asset_bounds_i): """ Calculates which bound value to assign to a bounded asset - lower bound or upper bound. :param c_final: (float) a value calculated using the covariance matrices of free weights. Refer to https://pdfs.semanticscholar.org/4fb1/2c1129ba5389bafe47b03e595d098d0252b9.pdf for more information. :param asset_bounds_i: (list) a list containing the lower and upper bound values for the ith weight :return: bounded weight value """ if c_final > 0: return asset_bounds_i[1][0] return asset_bounds_i[0][0] def _compute_w(self, covar_f_inv, covar_fb, mean_f, w_b): """ Compute the turning point associated with the current set of free weights F. :param covar_f_inv: (np.array) inverse of covariance matrix of free assets :param covar_fb: (np.array) covariance matrix between free assets and bounded assets :param mean_f: (np.array) expected returns of free assets :param w_b: (np.array) bounded asset weight values :return: (array, float) list of turning point weights and gamma value from the langrange equation """ # Compute gamma ones_f = np.ones(mean_f.shape) g_1 = np.dot(np.dot(ones_f.T, covar_f_inv), mean_f) g_2 = np.dot(np.dot(ones_f.T, covar_f_inv), ones_f) if w_b is None: g_final, w_1 = float(-self.lambdas[-1] * g_1 / g_2 + 1 / g_2), 0 else: ones_b = np.ones(w_b.shape) g_3 = np.dot(ones_b.T, w_b) g_4 = np.dot(covar_f_inv, covar_fb) w_1 = np.dot(g_4, w_b) g_4 = np.dot(ones_f.T, w_1) g_final = float(-self.lambdas[-1] * g_1 / g_2 + (1 - g_3 + g_4) / g_2) # Compute weights w_2 = np.dot(covar_f_inv, ones_f) w_3 = np.dot(covar_f_inv, mean_f) free_asset_weights = -1 * w_1 + g_final * w_2 + self.lambdas[-1] * w_3 return free_asset_weights, g_final def _compute_lambda(self, covar_f_inv, covar_fb, mean_f, w_b, asset_index, b_i): """ Calculate the lambda value in the langrange optimsation equation. :param covar_f_inv: (np.array) inverse of covariance matrix of free assets :param covar_fb: (np.array) covariance matrix between free assets and bounded assets :param mean_f: (np.array) expected returns of free assets :param w_b: (np.array) bounded asset weight values :param asset_index: (int) index of the asset in the portfolio :param b_i: (list) list of upper and lower bounded weight values :return: (float) lambda value """ # Compute C ones_f = np.ones(mean_f.shape) c_1 = np.dot(np.dot(ones_f.T, covar_f_inv), ones_f) c_2 = np.dot(covar_f_inv, mean_f) c_3 = np.dot(np.dot(ones_f.T, covar_f_inv), mean_f) c_4 = np.dot(covar_f_inv, ones_f) c_final = -1 * c_1 * c_2[asset_index] + c_3 * c_4[asset_index] if c_final == 0: return None, None # Compute bi if isinstance(b_i, list): b_i = self._compute_bi(c_final, b_i) # Compute Lambda if w_b is None: # All free assets return float((c_4[asset_index] - c_1 * b_i) / c_final), b_i ones_b = np.ones(w_b.shape) l_1 = np.dot(ones_b.T, w_b) l_2 = np.dot(covar_f_inv, covar_fb) l_3 = np.dot(l_2, w_b) l_2 = np.dot(ones_f.T, l_3) lambda_value = float(((1 - l_1 + l_2) * c_4[asset_index] - c_1 * (b_i + l_3[asset_index])) / c_final) return lambda_value, b_i def _get_matrices(self, free_weights): """ Calculate the required matrices between free and bounded assets. :param free_weights: (list) list of free assets/weights :return: (tuple of np.array matrices) the corresponding matrices """ covar_f = self._reduce_matrix(self.cov_matrix, free_weights, free_weights) mean_f = self._reduce_matrix(self.expected_returns, free_weights, [0]) bounded_weights = self._get_bounded_weights(free_weights) covar_fb = self._reduce_matrix(self.cov_matrix, free_weights, bounded_weights) w_b = self._reduce_matrix(self.weights[-1], bounded_weights, [0]) return covar_f, covar_fb, mean_f, w_b def _get_bounded_weights(self, free_weights): """ Compute the list of bounded assets. :param free_weights: (np.array) list of free weights/assets :return: (np.array) list of bounded assets/weights """ return self._diff_lists(list(range(self.expected_returns.shape[0])), free_weights) @staticmethod def _diff_lists(list_1, list_2): """ Calculate the set difference between two lists. :param list_1: (list) a list of asset indices :param list_2: (list) another list of asset indices :return: (list) set difference between the two input lists """ return list(set(list_1) - set(list_2)) @staticmethod def _reduce_matrix(matrix, row_indices, col_indices): """ Reduce a matrix to the provided set of rows and columns. :param matrix: (np.array) a matrix whose subset of rows and columns we need :param row_indices: (list) list of row indices for the matrix :param col_indices: (list) list of column indices for the matrix :return: (np.array) subset of input matrix """ return matrix[np.ix_(row_indices, col_indices)] def _purge_num_err(self, tol): """ Purge violations of inequality constraints (associated with ill-conditioned cov matrix). :param tol: (float) tolerance level for purging """ index_1 = 0 while True: flag = False if index_1 == len(self.weights): break if abs(sum(self.weights[index_1]) - 1) > tol: flag = True else: for index_2 in range(len(self.weights[index_1])): if (self.weights[index_1][index_2] - self.lower_bounds[index_2] < -tol or self.weights[index_1][index_2] - self.upper_bounds[index_2] > tol): flag = True break if flag is True: del self.weights[index_1] del self.lambdas[index_1] del self.gammas[index_1] del self.free_weights[index_1] else: index_1 += 1 def _purge_excess(self): """ Remove violations of the convex hull. """ index_1, repeat = 0, False while True: if repeat is False: index_1 += 1 if index_1 >= len(self.weights) - 1: break weights = self.weights[index_1] mean = np.dot(weights.T, self.expected_returns)[0, 0] index_2, repeat = index_1 + 1, False while True: if index_2 == len(self.weights): break weights = self.weights[index_2] mean_ = np.dot(weights.T, self.expected_returns)[0, 0] if mean < mean_: del self.weights[index_1] del self.lambdas[index_1] del self.gammas[index_1] del self.free_weights[index_1] repeat = True break index_2 += 1 @staticmethod def _golden_section(obj, left, right, **kwargs): """ Golden section method. Maximum if kargs['minimum']==False is passed. :param obj: (function) The objective function on which the extreme will be found. :param left: (float) The leftmost extreme of search :param right: (float) The rightmost extreme of search """ tol, sign, args = 1.0e-9, -1, None args = kwargs.get("args", None) num_iterations = int(ceil(-2.078087 * log(tol / abs(right - left)))) gs_ratio = 0.618033989 complementary_gs_ratio = 1.0 - gs_ratio # Initialize x_1 = gs_ratio * left + complementary_gs_ratio * right x_2 = complementary_gs_ratio * left + gs_ratio * right f_1 = sign * obj(x_1, *args) f_2 = sign * obj(x_2, *args) # Loop for _ in range(num_iterations): if f_1 > f_2: left = x_1 x_1 = x_2 f_1 = f_2 x_2 = complementary_gs_ratio * left + gs_ratio * right f_2 = sign * obj(x_2, *args) else: right = x_2 x_2 = x_1 f_2 = f_1 x_1 = gs_ratio * left + complementary_gs_ratio * right f_1 = sign * obj(x_1, *args) if f_1 < f_2: return x_1, sign * f_1 return x_2, sign * f_2 def _eval_sr(self, alpha, w_0, w_1): """ Evaluate the sharpe ratio of the portfolio within the convex combination. :param alpha: (float) convex combination value :param w_0: (list) first endpoint of convex combination of weights :param w_1: (list) second endpoint of convex combination of weights :return: """ weights = alpha * w_0 + (1 - alpha) * w_1 returns = np.dot(weights.T, self.expected_returns)[0, 0] volatility = np.dot(np.dot(weights.T, self.cov_matrix), weights)[0, 0]**0.5 return returns / volatility def _bound_free_weight(self, free_weights): """ Add a free weight to list of bounded weights. :param free_weights: (list) list of free-weight indices :return: (float, int, int) lambda value, index of free weight to be bounded, bound weight value """ lambda_in = None i_in = None bi_in = None if len(free_weights) > 1: covar_f, covar_fb, mean_f, w_b = self._get_matrices(free_weights) covar_f_inv = np.linalg.inv(covar_f) j = 0 for i in free_weights: lambda_i, b_i = self._compute_lambda( covar_f_inv, covar_fb, mean_f, w_b, j, [self.lower_bounds[i], self.upper_bounds[i]]) if self._infnone(lambda_i) > self._infnone(lambda_in): lambda_in, i_in, bi_in = lambda_i, i, b_i j += 1 return lambda_in, i_in, bi_in def _free_bound_weight(self, free_weights): """ Add a bounded weight to list of free weights. :param free_weights: (list) list of free-weight indices :return: (float, int) lambda value, index of the bounded weight to be made free """ lambda_out = None i_out = None if len(free_weights) < self.expected_returns.shape[0]: bounded_weight_indices = self._get_bounded_weights(free_weights) for i in bounded_weight_indices: covar_f, covar_fb, mean_f, w_b = self._get_matrices( free_weights + [i]) covar_f_inv = np.linalg.inv(covar_f) lambda_i, _ = self._compute_lambda( covar_f_inv, covar_fb, mean_f, w_b, mean_f.shape[0] - 1, self.weights[-1][i], ) if (self.lambdas[-1] is None or lambda_i < self.lambdas[-1] ) and lambda_i > self._infnone(lambda_out): lambda_out, i_out = lambda_i, i return lambda_out, i_out def _initialise(self, asset_prices, expected_asset_returns, covariance_matrix, resample_by): # pylint: disable=invalid-name, too-many-branches, bad-continuation """ Initialise covariances, upper-counds, lower-bounds and storage buffers. :param asset_prices: (pd.Dataframe) dataframe of asset prices indexed by date :param expected_asset_returns: (list) a list of mean stock returns (mu) :param covariance_matrix: (pd.Dataframe) user supplied dataframe of asset returns indexed by date. Used for calculation of covariance matrix :param resample_by: (str) specifies how to resample the prices - weekly, daily, monthly etc.. Defaults to 'B' meaning daily business days which is equivalent to no resampling """ # Calculate the returns if the user does not supply a returns matrix self.expected_returns = expected_asset_returns if expected_asset_returns is None: if self.calculate_expected_returns == "mean": self.expected_returns = self.returns_estimator.calculate_mean_historical_returns( asset_prices=asset_prices, resample_by=resample_by) elif self.calculate_expected_returns == "exponential": self.expected_returns = self.returns_estimator.calculate_exponential_historical_returns( asset_prices=asset_prices, resample_by=resample_by) else: raise ValueError( "Unknown returns specified. Supported returns - mean, exponential" ) self.expected_returns = np.array(self.expected_returns).reshape( (len(self.expected_returns), 1)) if (self.expected_returns == np.ones(self.expected_returns.shape) * self.expected_returns.mean()).all(): self.expected_returns[-1, 0] += 1e-5 # Calculate the covariance matrix if covariance_matrix is None: returns = self.returns_estimator.calculate_returns( asset_prices=asset_prices, resample_by=resample_by) covariance_matrix = returns.cov() self.cov_matrix = np.asarray(covariance_matrix) # Intialise lower bounds if isinstance(self.weight_bounds[0], numbers.Real): self.lower_bounds = np.ones( self.expected_returns.shape) * self.weight_bounds[0] else: self.lower_bounds = np.array(self.weight_bounds[0]).reshape( self.expected_returns.shape) # Intialise upper bounds if isinstance(self.weight_bounds[0], numbers.Real): self.upper_bounds = np.ones( self.expected_returns.shape) * self.weight_bounds[1] else: self.upper_bounds = np.array(self.weight_bounds[1]).reshape( self.expected_returns.shape) # Initialise storage buffers self.weights = [] self.lambdas = [] self.gammas = [] self.free_weights = [] def allocate(self, asset_names, asset_prices=None, expected_asset_returns=None, covariance_matrix=None, solution="cla_turning_points", resample_by=None): # pylint: disable=consider-using-enumerate,too-many-locals,too-many-branches,too-many-statements,bad-continuation """ Calculate the portfolio asset allocations using the method specified. :param asset_names: (list) a list of strings containing the asset names :param asset_prices: (pd.Dataframe) a dataframe of historical asset prices (adj closed) :param expected_asset_returns: (list) a list of mean stock returns (mu) :param covariance_matrix: (pd.Dataframe/numpy matrix) user supplied covariance matrix of asset returns :param solution: (str) specify the type of solution to compute. Options are: cla_turning_points, max_sharpe, min_volatility, efficient_frontier :param resample_by: (str) specifies how to resample the prices - weekly, daily, monthly etc.. Defaults to None for no resampling """ # Initial checks if asset_prices is None and (expected_asset_returns is None or covariance_matrix is None): raise ValueError( "Either supply your own asset returns matrix or pass the asset prices as input" ) if asset_prices is not None: if not isinstance(asset_prices, pd.DataFrame): raise ValueError("Asset prices matrix must be a dataframe") if not isinstance(asset_prices.index, pd.DatetimeIndex): raise ValueError( "Asset prices dataframe must be indexed by date.") # Some initial steps before the algorithm runs self._initialise(asset_prices=asset_prices, resample_by=resample_by, expected_asset_returns=expected_asset_returns, covariance_matrix=covariance_matrix) # Compute the turning points, free sets and weights free_weights, weights = self._init_algo() self.weights.append(np.copy(weights)) # store solution self.lambdas.append(None) self.gammas.append(None) self.free_weights.append(free_weights[:]) while True: # 1) Bound one free weight lambda_in, i_in, bi_in = self._bound_free_weight(free_weights) # 2) Free one bounded weight lambda_out, i_out = self._free_bound_weight(free_weights) # 3) Compute minimum variance solution if (lambda_in is None or lambda_in < 0) and (lambda_out is None or lambda_out < 0): self.lambdas.append(0) covar_f, covar_fb, mean_f, w_b = self._get_matrices( free_weights) covar_f_inv = np.linalg.inv(covar_f) mean_f = np.zeros(mean_f.shape) # 4) Decide whether to free a bounded weight or bound a free weight else: if self._infnone(lambda_in) > self._infnone(lambda_out): self.lambdas.append(lambda_in) free_weights.remove(i_in) weights[i_in] = bi_in # set value at the correct boundary else: self.lambdas.append(lambda_out) free_weights.append(i_out) covar_f, covar_fb, mean_f, w_b = self._get_matrices( free_weights) covar_f_inv = np.linalg.inv(covar_f) # 5) Compute solution vector w_f, gamma = self._compute_w(covar_f_inv, covar_fb, mean_f, w_b) for i in range(len(free_weights)): weights[free_weights[i]] = w_f[i] self.weights.append(np.copy(weights)) # store solution self.gammas.append(gamma) self.free_weights.append(free_weights[:]) if self.lambdas[-1] == 0: break # 6) Purge turning points self._purge_num_err(10e-10) self._purge_excess() # Compute the specified solution self._compute_solution(assets=asset_names, solution=solution) def _compute_solution(self, assets, solution): """ Compute the desired solution to the portfolio optimisation problem. :param assets: (list) a list of asset names :param solution: (str) specify the type of solution to compute. Options are: cla_turning_points, max_sharpe, min_volatility, efficient_frontier """ if solution == "max_sharpe": self.max_sharpe, self.weights = self._max_sharpe() self.weights = pd.DataFrame(self.weights) self.weights.index = assets self.weights = self.weights.T elif solution == "min_volatility": self.min_var, self.weights = self._min_volatility() self.weights = pd.DataFrame(self.weights) self.weights.index = assets self.weights = self.weights.T elif solution == "efficient_frontier": self.efficient_frontier_means, self.efficient_frontier_sigma, self.weights = self._efficient_frontier( ) weights_copy = self.weights.copy() for i, turning_point in enumerate(weights_copy): self.weights[i] = turning_point.reshape(1, -1)[0] self.weights = pd.DataFrame(self.weights, columns=assets) elif solution == "cla_turning_points": # Reshape the weight matrix weights_copy = self.weights.copy() for i, turning_point in enumerate(weights_copy): self.weights[i] = turning_point.reshape(1, -1)[0] self.weights = pd.DataFrame(self.weights, columns=assets) else: raise ValueError( "Unknown solution string specified. Supported solutions - cla_turning_points, " "efficient_frontier, min_volatility, max_sharpe") def _max_sharpe(self): """ Compute the maximum sharpe portfolio allocation. :return: (float, np.array) tuple of max. sharpe value and the set of weight allocations """ # 1) Compute the local max SR portfolio between any two neighbor turning points w_sr, sharpe_ratios = [], [] for i in range(len(self.weights) - 1): w_0 = np.copy(self.weights[i]) w_1 = np.copy(self.weights[i + 1]) kwargs = {"minimum": False, "args": (w_0, w_1)} alpha, sharpe_ratio = self._golden_section(self._eval_sr, 0, 1, **kwargs) w_sr.append(alpha * w_0 + (1 - alpha) * w_1) sharpe_ratios.append(sharpe_ratio) maximum_sharp_ratio = max(sharpe_ratios) weights_with_max_sharpe_ratio = w_sr[sharpe_ratios.index( maximum_sharp_ratio)] return maximum_sharp_ratio, weights_with_max_sharpe_ratio def _min_volatility(self): """ Compute minimum volatility portfolio allocation. :return: (float, np.array) tuple of minimum variance value and the set of weight allocations """ var = [] for weights in self.weights: volatility = np.dot(np.dot(weights.T, self.cov_matrix), weights) var.append(volatility) min_var = min(var) return min_var**.5, self.weights[var.index(min_var)] def _efficient_frontier(self, points=100): # pylint: disable=invalid-name """ Compute the entire efficient frontier solution. :param points: (int) number of efficient frontier points to be calculated :return: tuple of mean, variance amd weights of the frontier solutions """ means, sigma, weights = [], [], [] # remove the 1, to avoid duplications partitions = np.linspace(0, 1, points // len(self.weights))[:-1] b = list(range(len(self.weights) - 1)) for i in b: w_0, w_1 = self.weights[i], self.weights[i + 1] if i == b[-1]: # include the 1 in the last iteration partitions = np.linspace(0, 1, points // len(self.weights)) for j in partitions: w = w_1 * j + (1 - j) * w_0 weights.append(np.copy(w)) means.append(np.dot(w.T, self.expected_returns)[0, 0]) sigma.append( np.dot(np.dot(w.T, self.cov_matrix), w)[0, 0]**0.5) return means, sigma, weights
class HierarchicalClusteringAssetAllocation: """ This class implements the Hierarchical Equal Risk Contribution (HERC) algorithm and it's extended components mentioned in the following papers: `Raffinot, Thomas, The Hierarchical Equal Risk Contribution Portfolio (August 23, 2018). <https://ssrn.com/abstract=3237540>`_; and `Raffinot, Thomas, Hierarchical Clustering Based Asset Allocation (May 2017) <https://ssrn.com/abstract=2840729>`_; While the vanilla Hierarchical Risk Parity algorithm uses only the variance as a risk measure for assigning weights, the HERC algorithm proposed by Raffinot, allows investors to use other risk metrics like Expected Shortfall, Sharpe Ratio and Conditional Drawdown. Furthermore, it is flexible enough to be easily extended to include custom risk measures of our own. """ def __init__(self, calculate_expected_returns='mean', confidence_level=0.05): """ Initialise. :param calculate_expected_returns: (str) The method to use for calculation of expected returns. Currently supports "mean" and "exponential" :param confidence_level: (float) The confidence level (alpha) used for calculating expected shortfall and conditional drawdown at risk. """ self.weights = list() self.clusters = None self.ordered_indices = None self.cluster_children = None self.returns_estimator = ReturnsEstimation() self.risk_metrics = RiskMetrics() self.calculate_expected_returns = calculate_expected_returns self.confidence_level = confidence_level def allocate(self, asset_names=None, asset_prices=None, asset_returns=None, covariance_matrix=None, expected_asset_returns=None, allocation_metric='equal_weighting', linkage='ward', optimal_num_clusters=None): """ Calculate asset allocations using the HCAA algorithm. :param asset_names: (list) A list of strings containing the asset names. :param asset_prices: (pd.DataFrame) A dataframe of historical asset prices (daily close) indexed by date. :param asset_returns: (pd.DataFrame/numpy matrix) User supplied matrix of asset returns. :param covariance_matrix: (pd.DataFrame/numpy matrix) User supplied covariance matrix of asset returns. :param expected_asset_returns: (list) A list of mean asset returns (mu). :param allocation_metric: (str) The metric used for calculating weight allocations. Supported strings - "equal_weighting", "minimum_variance", "minimum_standard_deviation", "sharpe_ratio", "expected_shortfall", "conditional_drawdown_risk". :param linkage: (str) The type of linkage method to use for clustering. Supported strings - "single", "average", "complete" and "ward". :param optimal_num_clusters: (int) Optimal number of clusters for hierarchical clustering. """ # Perform initial checks self._perform_checks(asset_prices, asset_returns, expected_asset_returns, allocation_metric) # Calculate the expected returns if the user does not supply any returns (only required for sharpe_ratio allocation metric) if allocation_metric == 'sharpe_ratio' and expected_asset_returns is None: if self.calculate_expected_returns == "mean": expected_asset_returns = self.returns_estimator.calculate_mean_historical_returns( asset_prices=asset_prices) elif self.calculate_expected_returns == "exponential": expected_asset_returns = self.returns_estimator.calculate_exponential_historical_returns( asset_prices=asset_prices) else: raise ValueError( "Unknown returns specified. Supported returns - mean, exponential" ) if asset_names is None: if asset_prices is not None: asset_names = asset_prices.columns elif asset_returns is not None and isinstance( asset_returns, pd.DataFrame): asset_names = asset_returns.columns else: raise ValueError("Please provide a list of asset names") # Calculate the returns if the user does not supply a returns dataframe if asset_returns is None: if allocation_metric in {'expected_shortfall', 'conditional_drawdown_risk'} or \ covariance_matrix is None or not optimal_num_clusters: asset_returns = self.returns_estimator.calculate_returns( asset_prices=asset_prices) asset_returns = pd.DataFrame(asset_returns, columns=asset_names) # Calculate covariance of returns or use the user specified covariance matrix if covariance_matrix is None: covariance_matrix = asset_returns.cov() cov = pd.DataFrame(covariance_matrix, index=asset_names, columns=asset_names) # Calculate correlation from covariance matrix corr = self._cov2corr(covariance=cov) # Calculate the optimal number of clusters using the Gap statistic if not optimal_num_clusters: optimal_num_clusters = self._get_optimal_number_of_clusters( correlation=corr, linkage=linkage, asset_returns=asset_returns) # Tree Clustering self.clusters, self.cluster_children = self._tree_clustering( correlation=corr, num_clusters=optimal_num_clusters, linkage=linkage) # Get the flattened order of assets in hierarchical clustering tree num_assets = len(asset_names) self.ordered_indices = self._quasi_diagnalization( num_assets, 2 * num_assets - 2) # Recursive Bisection self._recursive_bisection( expected_asset_returns=expected_asset_returns, asset_returns=asset_returns, covariance_matrix=cov, assets=asset_names, allocation_metric=allocation_metric, optimal_num_clusters=optimal_num_clusters) @staticmethod def _compute_cluster_inertia(labels, asset_returns): """ Calculate the cluster inertia (within cluster sum-of-squares). :param labels: (list) Cluster labels. :param asset_returns: (pd.DataFrame) Historical asset returns. :return: (float) Cluster inertia value. """ unique_labels = np.unique(labels) inertia = [ np.mean(pairwise_distances(asset_returns[:, labels == label])) for label in unique_labels ] inertia = np.log(np.sum(inertia)) return inertia def _get_optimal_number_of_clusters(self, correlation, asset_returns, linkage, num_reference_datasets=5): """ Find the optimal number of clusters for hierarchical clustering using the Gap statistic. :param correlation: (np.array) Matrix of asset correlations. :param asset_returns: (pd.DataFrame) Historical asset returns. :param linkage: (str) The type of linkage method to use for clustering. :param num_reference_datasets: (int) The number of reference datasets to generate for calculating expected inertia. :return: (int) The optimal number of clusters. """ max_number_of_clusters = min(10, asset_returns.shape[1]) original_distance_matrix = np.sqrt(2 * (1 - correlation).round(5)) gap_values = [] for num_clusters in range(1, max_number_of_clusters + 1): # Calculate expected inertia from reference datasets expected_inertia = self._calculate_expected_inertia( num_reference_datasets, asset_returns, num_clusters, linkage) # Calculate inertia from original data original_clusters = scipy_linkage( squareform(original_distance_matrix), method=linkage) original_cluster_assignments = fcluster(original_clusters, num_clusters, criterion='maxclust') inertia = self._compute_cluster_inertia( original_cluster_assignments, asset_returns.values) # Calculate the gap statistic gap = expected_inertia - inertia gap_values.append(gap) return 1 + np.argmax(gap_values) def _calculate_expected_inertia(self, num_reference_datasets, asset_returns, num_clusters, linkage): """ Calculate the expected inertia by generating clusters from a uniform distribution. :param num_reference_datasets: (int) The number of reference datasets to generate from the distribution. :param asset_returns: (pd.DataFrame) Historical asset returns. :param num_clusters: (int) The number of clusters to generate. :param linkage: (str) The type of linkage criterion to use for hierarchical clustering. :return: (float) The expected inertia from the reference datasets. """ reference_inertias = [] for _ in range(num_reference_datasets): # Generate reference returns from uniform distribution and calculate the distance matrix. reference_asset_returns = pd.DataFrame( np.random.rand(*asset_returns.shape)) reference_correlation = np.array(reference_asset_returns.corr()) reference_distance_matrix = np.sqrt( 2 * (1 - reference_correlation).round(5)) reference_clusters = scipy_linkage( squareform(reference_distance_matrix), method=linkage) reference_cluster_assignments = fcluster(reference_clusters, num_clusters, criterion='maxclust') inertia = self._compute_cluster_inertia( reference_cluster_assignments, reference_asset_returns.values) reference_inertias.append(inertia) return np.mean(reference_inertias) @staticmethod def _tree_clustering(correlation, num_clusters, linkage): """ Perform agglomerative clustering on the current portfolio. :param correlation: (np.array) Matrix of asset correlations. :param num_clusters: (int) The number of clusters. :param linkage (str): The type of linkage method to use for clustering. :return: (list) Structure of hierarchical tree. """ distance_matrix = np.sqrt(2 * (1 - correlation).round(5)) clusters = scipy_linkage(squareform(distance_matrix.values), method=linkage) clustering_inds = fcluster(clusters, num_clusters, criterion='maxclust') cluster_children = { index - 1: [] for index in range(min(clustering_inds), max(clustering_inds) + 1) } for index, cluster_index in enumerate(clustering_inds): cluster_children[cluster_index - 1].append(index) return clusters, cluster_children def _quasi_diagnalization(self, num_assets, curr_index): """ Rearrange the assets to reorder them according to hierarchical tree clustering order. :param num_assets: (int) The total number of assets. :param curr_index: (int) Current index. :return: (list) The assets rearranged according to hierarchical clustering. """ if curr_index < num_assets: return [curr_index] left = int(self.clusters[curr_index - num_assets, 0]) right = int(self.clusters[curr_index - num_assets, 1]) return (self._quasi_diagnalization(num_assets, left) + self._quasi_diagnalization(num_assets, right)) def _recursive_bisection(self, expected_asset_returns, asset_returns, covariance_matrix, assets, allocation_metric, optimal_num_clusters): """ Recursively assign weights to the clusters - ultimately assigning weights to the individual assets. :param expected_asset_returns: (list) A list of mean asset returns (mu). :param asset_returns: (pd.DataFrame) Historical asset returns. :param covariance_matrix: (pd.DataFrame) The covariance matrix. :param assets: (list) List of asset names in the portfolio. :param allocation_metric: (str) The metric used for calculating weight allocations. :param optimal_num_clusters: (int) Optimal number of clusters for hierarchical tree clustering. """ num_assets = len(assets) self.weights = np.ones(shape=num_assets) clusters_contribution = np.ones(shape=optimal_num_clusters) clusters_weights = np.ones(shape=optimal_num_clusters) clusters_variance = np.ones(shape=optimal_num_clusters) # Calculate the corresponding risk measure for the clusters self._calculate_risk_contribution_of_clusters( clusters_contribution, clusters_variance, allocation_metric, optimal_num_clusters, covariance_matrix, expected_asset_returns, asset_returns) # Recursive bisection taking into account the dendrogram structure for cluster_index in range(optimal_num_clusters - 1): # Get the left and right cluster ids left_cluster_ids, right_cluster_ids = self._get_children_cluster_ids( num_assets=num_assets, parent_cluster_id=cluster_index) # Compute alpha left_cluster_contribution = np.sum( clusters_contribution[left_cluster_ids]) right_cluster_contribution = np.sum( clusters_contribution[right_cluster_ids]) if allocation_metric in { 'minimum_variance', 'minimum_standard_deviation', 'expected_shortfall', 'conditional_drawdown_risk' }: alloc_factor = 1 - left_cluster_contribution / ( left_cluster_contribution + right_cluster_contribution) elif allocation_metric == 'sharpe_ratio': alloc_factor = left_cluster_contribution / ( left_cluster_contribution + right_cluster_contribution) # If sharp ratio allocation factor is not within limits, then calculate normal cluster variance allocation # factor if alloc_factor < 0 or alloc_factor > 1: left_cluster_variance = np.sum( clusters_variance[left_cluster_ids]) right_cluster_variance = np.sum( clusters_variance[right_cluster_ids]) alloc_factor = 1 - left_cluster_variance / ( left_cluster_variance + right_cluster_variance) else: alloc_factor = 0.5 # equal weighting # Assign weights to each sub-cluster clusters_weights[left_cluster_ids] *= alloc_factor clusters_weights[right_cluster_ids] *= 1 - alloc_factor # Compute the final weights self._calculate_final_portfolio_weights(clusters_weights, covariance_matrix, optimal_num_clusters) # Assign actual asset names to weight index self.weights = pd.DataFrame(self.weights) self.weights.index = assets[self.ordered_indices] self.weights = self.weights.T def _calculate_final_portfolio_weights(self, clusters_weights, covariance_matrix, optimal_num_clusters): """ Calculate the final asset weights. :param clusters_weights: (np.array) The cluster weights calculated using recursive bisection. :param covariance_matrix: (pd.DataFrame) The covariance matrix. :param optimal_num_clusters: (int) Optimal number of clusters for hierarchical tree clustering. """ for cluster_index in range(optimal_num_clusters): cluster_asset_indices = self.cluster_children[cluster_index] cluster_covariance = covariance_matrix.iloc[cluster_asset_indices, cluster_asset_indices] ivp_weights = self._get_inverse_variance_weights( cluster_covariance) self.weights[ cluster_asset_indices] = ivp_weights * clusters_weights[ cluster_index] def _calculate_risk_contribution_of_clusters( self, clusters_contribution, clusters_variance, allocation_metric, optimal_num_clusters, covariance_matrix, expected_asset_returns, asset_returns): """ Calculate the risk contribution of clusters based on the allocation metric. :param clusters_contribution: (np.array) The risk contribution value of the clusters. :param clusters_variance: (np.array) The variance of the clusters. :param allocation_metric: (str) The metric used for calculating weight allocations. :param optimal_num_clusters: (int) Optimal number of clusters for hierarchical tree clustering. :param covariance_matrix: (pd.DataFrame) The covariance matrix. :param expected_asset_returns: (list) A list of mean asset returns (mu). :param asset_returns: (pd.DataFrame) Historical asset returns. """ for cluster_index in range(optimal_num_clusters): cluster_asset_indices = self.cluster_children[cluster_index] if allocation_metric == 'minimum_variance': clusters_contribution[ cluster_index] = self._get_cluster_variance( covariance_matrix, cluster_asset_indices) elif allocation_metric == 'minimum_standard_deviation': clusters_contribution[cluster_index] = np.sqrt( self._get_cluster_variance(covariance_matrix, cluster_asset_indices)) elif allocation_metric == 'sharpe_ratio': clusters_contribution[ cluster_index] = self._get_cluster_sharpe_ratio( expected_asset_returns, covariance_matrix, cluster_asset_indices) clusters_variance[cluster_index] = self._get_cluster_variance( covariance_matrix, cluster_asset_indices) elif allocation_metric == 'expected_shortfall': clusters_contribution[ cluster_index] = self._get_cluster_expected_shortfall( asset_returns=asset_returns, covariance=covariance_matrix, cluster_indices=cluster_asset_indices) elif allocation_metric == 'conditional_drawdown_risk': clusters_contribution[ cluster_index] = self._get_cluster_conditional_drawdown_at_risk( asset_returns=asset_returns, covariance=covariance_matrix, cluster_indices=cluster_asset_indices) def _get_children_cluster_ids(self, num_assets, parent_cluster_id): """ Find the left and right children cluster id of the given parent cluster id. :param num_assets: (int) The number of assets in the portfolio. :param parent_cluster_index: (int) The current parent cluster id. :return: (list, list) List of cluster ids to the left and right of the parent cluster in the hierarchical tree. """ left = int(self.clusters[num_assets - 2 - parent_cluster_id, 0]) right = int(self.clusters[num_assets - 2 - parent_cluster_id, 1]) left_cluster = self._quasi_diagnalization(num_assets, left) right_cluster = self._quasi_diagnalization(num_assets, right) left_cluster_ids = [] right_cluster_ids = [] for id_cluster, cluster in self.cluster_children.items(): if sorted(self._intersection(left_cluster, cluster)) == sorted(cluster): left_cluster_ids.append(id_cluster) if sorted(self._intersection(right_cluster, cluster)) == sorted(cluster): right_cluster_ids.append(id_cluster) return left_cluster_ids, right_cluster_ids @staticmethod def _get_inverse_variance_weights(covariance): """ Calculate the inverse variance weight allocations. :param covariance: (pd.DataFrame) Covariance matrix of assets. :return: (list) Inverse variance weight values. """ inv_diag = 1 / np.diag(covariance.values) parity_w = inv_diag * (1 / np.sum(inv_diag)) return parity_w def _get_cluster_variance(self, covariance, cluster_indices): """ Calculate cluster variance. :param covariance: (pd.DataFrame) Covariance matrix of assets. :param cluster_indices: (list) List of asset indices for the cluster. :return: (float) Variance of the cluster. """ cluster_covariance = covariance.iloc[cluster_indices, cluster_indices] parity_w = self._get_inverse_variance_weights(cluster_covariance) cluster_variance = self.risk_metrics.calculate_variance( covariance=cluster_covariance, weights=parity_w) return cluster_variance def _get_cluster_sharpe_ratio(self, expected_asset_returns, covariance, cluster_indices): """ Calculate cluster Sharpe Ratio. :param expected_asset_returns: (list) A list of mean asset returns (mu). :param covariance: (pd.DataFrame) Covariance matrix of assets. :param cluster_indices: (list) List of asset indices for the cluster. :return: (float) Sharpe ratio of the cluster. """ cluster_expected_returns = expected_asset_returns[cluster_indices] cluster_covariance = covariance.iloc[cluster_indices, cluster_indices] parity_w = self._get_inverse_variance_weights(cluster_covariance) cluster_variance = self.risk_metrics.calculate_variance( covariance=cluster_covariance, weights=parity_w) cluster_sharpe_ratio = ( parity_w @ cluster_expected_returns) / np.sqrt(cluster_variance) return cluster_sharpe_ratio def _get_cluster_expected_shortfall(self, asset_returns, covariance, cluster_indices): """ Calculate cluster expected shortfall. :param asset_returns: (pd.DataFrame) Historical asset returns. :param covariance: (pd.DataFrame) Covariance matrix of assets. :param cluster_indices: (list) List of asset indices for the cluster. :return: (float) Expected shortfall of the cluster. """ cluster_asset_returns = asset_returns.iloc[:, cluster_indices] cluster_covariance = covariance.iloc[cluster_indices, cluster_indices] parity_w = self._get_inverse_variance_weights(cluster_covariance) portfolio_returns = cluster_asset_returns @ parity_w cluster_expected_shortfall = self.risk_metrics.calculate_expected_shortfall( returns=portfolio_returns, confidence_level=self.confidence_level) return cluster_expected_shortfall def _get_cluster_conditional_drawdown_at_risk(self, asset_returns, covariance, cluster_indices): """ Calculate cluster conditional drawdown at risk. :param asset_returns: (pd.DataFrame) Historical asset returns. :param covariance: (pd.DataFrame) Covariance matrix of assets. :param cluster_indices: (list) List of asset indices for the cluster. :return: (float) CDD of the cluster. """ cluster_asset_returns = asset_returns.iloc[:, cluster_indices] cluster_covariance = covariance.iloc[cluster_indices, cluster_indices] parity_w = self._get_inverse_variance_weights(cluster_covariance) portfolio_returns = cluster_asset_returns @ parity_w cluster_conditional_drawdown = self.risk_metrics.calculate_conditional_drawdown_risk( returns=portfolio_returns, confidence_level=self.confidence_level) return cluster_conditional_drawdown @staticmethod def _intersection(list1, list2): """ Calculate the intersection of two lists :param list1: (list) The first list of items. :param list2: (list) The second list of items. :return: (list) List containing the intersection of the input lists. """ return list(set(list1) & set(list2)) @staticmethod def _cov2corr(covariance): """ Calculate the correlations from asset returns covariance matrix. :param covariance: (pd.DataFrame) Asset returns covariances. :return: (pd.DataFrame) Correlations between asset returns. """ d_matrix = np.zeros_like(covariance) diagnoal_sqrt = np.sqrt(np.diag(covariance)) np.fill_diagonal(d_matrix, diagnoal_sqrt) d_inv = np.linalg.inv(d_matrix) corr = np.dot(np.dot(d_inv, covariance), d_inv) corr = pd.DataFrame(corr, index=covariance.columns, columns=covariance.columns) return corr @staticmethod def _perform_checks(asset_prices, asset_returns, expected_asset_returns, allocation_metric): """ Perform initial warning checks. :param asset_prices: (pd.DataFrame) A dataframe of historical asset prices (daily close) indexed by date. :param asset_returns: (pd.DataFrame/numpy matrix) User supplied matrix of asset returns. :param expected_asset_returns: (list) A list of mean asset returns (mu). :param allocation_metric: (str) The metric used for calculating weight allocations. """ if asset_prices is None and asset_returns is None and expected_asset_returns is None: raise ValueError( "You need to supply either raw prices or returns or expected asset returns." ) if asset_prices is not None: if not isinstance(asset_prices, pd.DataFrame): raise ValueError("Asset prices matrix must be a dataframe") if not isinstance(asset_prices.index, pd.DatetimeIndex): raise ValueError( "Asset prices dataframe must be indexed by date.") if allocation_metric not in \ {'minimum_variance', 'minimum_standard_deviation', 'sharpe_ratio', 'equal_weighting', 'expected_shortfall', 'conditional_drawdown_risk'}: raise ValueError( "Unknown allocation metric specified. Supported metrics are - minimum_variance, " "minimum_standard_deviation, sharpe_ratio, equal_weighting, expected_shortfall, " "conditional_drawdown_risk") if allocation_metric == 'sharpe_ratio' and expected_asset_returns is None and asset_prices is None: raise ValueError( "An expected asset returns list is required for sharpe ratio metric. Either provide pre-calculated" "expected asset returns or give raw asset prices for inbuilt returns calculation." )