def assets_clusters(returns, correlation="pearson", linkage="ward", k=None, max_k=10, leaf_order=True): r""" Create asset classes based on hierarchical clustering. Parameters ---------- returns : DataFrame Assets returns. correlation : str can be {'pearson', 'spearman' or 'distance'}. The correlation matrix used for create the clusters. The default is 'pearson'. Posible values are: - 'pearson': pearson correlation matrix. - 'spearman': spearman correlation matrix. - 'abs_pearson': absolute value pearson correlation matrix. - 'abs_spearman': absolute value spearman correlation matrix. - 'distance': distance correlation matrix. linkage : string, optional Linkage method of hierarchical clustering, see `linkage <https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html?highlight=linkage#scipy.cluster.hierarchy.linkage>`_ for more details. The default is 'single'. Posible values are: - 'single'. - 'complete'. - 'average'. - 'weighted'. - 'centroid'. - 'median'. - 'ward'. k : int, optional Number of clusters. This value is took instead of the optimal number of clusters calculated with the two difference gap statistic. The default is None. max_k : int, optional Max number of clusters used by the two difference gap statistic to find the optimal number of clusters. The default is 10. leaf_order : bool, optional Indicates if the cluster are ordered so that the distance between successive leaves is minimal. The default is True. Returns ------- clusters : DataFrame A dataframe with asset classes based on hierarchical clustering. Raises ------ ValueError when the value cannot be calculated. Examples -------- :: clusters = cf.assets_clusters(returns, correlation='pearson', linkage='ward', k=None, max_k=10, leaf_order=True) The clusters dataframe looks like this: .. image:: images/clusters_df.png """ if not isinstance(returns, pd.DataFrame): raise ValueError("returns must be a DataFrame") # Correlation matrix from covariance matrix if correlation in {"pearson", "spearman"}: corr = returns.corr(method=correlation) if correlation in {"abs_pearson", "abs_spearman"}: corr = np.abs(returns.corr(method=correlation[4:])) elif correlation == "distance": corr = af.dcorr_matrix(returns) # hierarchcial clustering dist = np.sqrt((1 - corr).round(8) / 2) dist = pd.DataFrame(dist, columns=corr.columns, index=corr.index) p_dist = squareform(dist, checks=False) clustering = hr.linkage(p_dist, method=linkage, optimal_ordering=leaf_order) if k is None: # optimal number of clusters k = af.two_diff_gap_stat(corr, dist, clustering, max_k) clusters_inds = hr.fcluster(clustering, k, criterion="maxclust") clusters = {"Assets": [], "Clusters": []} for i, v in enumerate(clusters_inds): clusters["Assets"].append(corr.columns.tolist()[i]) clusters["Clusters"].append("Cluster " + str(v)) clusters = pd.DataFrame(clusters) clusters = clusters.sort_values(by=["Assets"]) return clusters
def optimization( self, model="HRP", correlation="pearson", covariance="hist", rm="MV", rf=0, linkage="single", k=None, max_k=10, leaf_order=True, d=0.94, ): r""" This method calculates the optimal portfolio according to the optimization model selected by the user. Parameters ---------- model : str can be {'HRP', 'HERC' or 'HERC2'} The hierarchical cluster portfolio model used for optimize the portfolio. The default is 'HRP'. Posible values are: - 'HRP': Hierarchical Risk Parity. - 'HERC': Hierarchical Equal Risk Contribution. - 'HERC2': HERC but splitting weights equally within clusters. correlation : str can be {'pearson', 'spearman' or 'distance'}. The correlation matrix used for create the clusters. The default is 'pearson'. Posible values are: - 'pearson': pearson correlation matrix. - 'spearman': spearman correlation matrix. - 'abs_pearson': absolute value pearson correlation matrix. - 'abs_spearman': absolute value spearman correlation matrix. - 'distance': distance correlation matrix. covariance : str, can be {'hist', 'ewma1', 'ewma2', 'ledoit', 'oas' or 'shrunk'} The method used to estimate the covariance matrix: The default is 'hist'. - 'hist': use historical estimates. - 'ewma1'': use ewma with adjust=True, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`_ for more details. - 'ewma2': use ewma with adjust=False, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`_ for more details. - 'ledoit': use the Ledoit and Wolf Shrinkage method. - 'oas': use the Oracle Approximation Shrinkage method. - 'shrunk': use the basic Shrunk Covariance method. rm : str, optional The risk measure used to optimze the portfolio. The default is 'MV'. Posible values are: - 'equal': Equally weighted. - 'vol': Standard Deviation. - 'MV': Variance. - 'MAD': Mean Absolute Deviation. - 'MSV': Semi Standard Deviation. - 'FLPM': First Lower Partial Moment (Omega Ratio). - 'SLPM': Second Lower Partial Moment (Sortino Ratio). - 'VaR': Value at Risk. - 'CVaR': Conditional Value at Risk. - 'EVaR': Entropic Value at Risk. - 'WR': Worst Realization (Minimax) - 'MDD': Maximum Drawdown of uncompounded cumulative returns (Calmar Ratio). - 'ADD': Average Drawdown of uncompounded cumulative returns. - 'DaR': Drawdown at Risk of uncompounded cumulative returns. - 'CDaR': Conditional Drawdown at Risk of uncompounded cumulative returns. - 'EDaR': Entropic Drawdown at Risk of uncompounded cumulative returns. - 'UCI': Ulcer Index of uncompounded cumulative returns. - 'MDD_Rel': Maximum Drawdown of compounded cumulative returns (Calmar Ratio). - 'ADD_Rel': Average Drawdown of compounded cumulative returns. - 'DaR_Rel': Drawdown at Risk of compounded cumulative returns. - 'CDaR_Rel': Conditional Drawdown at Risk of compounded cumulative returns. - 'EDaR_Rel': Entropic Drawdown at Risk of compounded cumulative returns. - 'UCI_Rel': Ulcer Index of compounded cumulative returns. rf : float, optional Risk free rate, must be in the same period of assets returns. The default is 0. linkage : string, optional Linkage method of hierarchical clustering, see `linkage <https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html?highlight=linkage#scipy.cluster.hierarchy.linkage>`_ for more details. The default is 'single'. Posible values are: - 'single'. - 'complete'. - 'average'. - 'weighted'. - 'centroid'. - 'median'. - 'ward'. k : int, optional Number of clusters. This value is took instead of the optimal number of clusters calculated with the two difference gap statistic. The default is None. max_k : int, optional Max number of clusters used by the two difference gap statistic to find the optimal number of clusters. The default is 10. leaf_order : bool, optional Indicates if the cluster are ordered so that the distance between successive leaves is minimal. The default is True. d : scalar The smoothing factor of ewma methods. The default is 0.94. Returns ------- w : DataFrame The weights of optimal portfolio. """ # Correlation matrix from covariance matrix self.cov = pe.covar_matrix(self.returns, method=covariance, d=0.94) if correlation in {"pearson", "spearman"}: self.corr = self.returns.corr(method=correlation).astype(float) if correlation in {"abs_pearson", "abs_spearman"}: self.corr = np.abs( self.returns.corr(method=correlation[4:])).astype(float) elif correlation == "distance": self.corr = af.dcorr_matrix(self.returns).astype(float) # Step-1: Tree clustering if model == "HRP": self.clusters = self._hierarchical_clustering_hrp( linkage, leaf_order=leaf_order) elif model in ["HERC", "HERC2"]: self.clusters, self.k = self._hierarchical_clustering_herc( linkage, max_k, leaf_order=leaf_order) if k is not None: self.k = int(k) # Step-2: Seriation (Quasi-Diagnalization) self.sort_order = self._seriation(self.clusters) asset_order = self.assetslist asset_order[:] = [self.assetslist[i] for i in self.sort_order] self.asset_order = asset_order self.corr_sorted = self.corr.reindex(index=self.asset_order, columns=self.asset_order) # Step-3: Recursive bisection if model == "HRP": weights = self._recursive_bisection(self.sort_order, rm=rm, rf=rf) elif model in ["HERC", "HERC2"]: weights = self._hierarchical_recursive_bisection(self.clusters, rm=rm, rf=rf, linkage=linkage, model=model) weights = weights.loc[self.assetslist].to_frame() weights.columns = ["weights"] return weights
def optimization( self, model="HRP", correlation="pearson", rm="MV", rf=0, linkage="single", k=None, max_k=10, leaf_order=True, ): r""" This method calculates the optimal portfolio according to the optimization model selected by the user. Parameters ---------- model : str can be {'HRP' or 'HERC'} The hierarchical cluster portfolio model used for optimize the portfolio. The default is 'HRP'. Posible values are: - 'HRP': Hierarchical Risk Parity. - 'HERC': Hierarchical Equal Risk Contribution. correlation : str can be {'pearson', 'spearman' or 'distance'}. The correlation matrix used for create the clusters. The default is 'pearson'. Posible values are: - 'pearson': pearson correlation matrix. - 'spearman': spearman correlation matrix. - 'abs_pearson': absolute value pearson correlation matrix. - 'abs_spearman': absolute value spearman correlation matrix. - 'distance': distance correlation matrix. rm : str, optional The risk measure used to optimze the portfolio. The default is 'MV'. Posible values are: - 'vol': Standard Deviation. - 'MV': Variance. - 'MAD': Mean Absolute Deviation. - 'MSV': Semi Standard Deviation. - 'FLPM': First Lower Partial Moment (Omega Ratio). - 'SLPM': Second Lower Partial Moment (Sortino Ratio). - 'VaR': Value at Risk. - 'CVaR': Conditional Value at Risk. - 'EVaR': Entropic Value at Risk. - 'WR': Worst Realization (Minimax) - 'MDD': Maximum Drawdown of uncompounded cumulative returns (Calmar Ratio). - 'ADD': Average Drawdown of uncompounded cumulative returns. - 'DaR': Drawdown at Risk of uncompounded cumulative returns. - 'CDaR': Conditional Drawdown at Risk of uncompounded cumulative returns. - 'EDaR': Entropic Drawdown at Risk of uncompounded cumulative returns. - 'UCI': Ulcer Index of uncompounded cumulative returns. - 'MDD_Rel': Maximum Drawdown of compounded cumulative returns (Calmar Ratio). - 'ADD_Rel': Average Drawdown of compounded cumulative returns. - 'DaR_Rel': Drawdown at Risk of compounded cumulative returns. - 'CDaR_Rel': Conditional Drawdown at Risk of compounded cumulative returns. - 'EDaR_Rel': Entropic Drawdown at Risk of compounded cumulative returns. - 'UCI_Rel': Ulcer Index of compounded cumulative returns. rf : float, optional Risk free rate, must be in the same period of assets returns. The default is 0. linkage : string, optional Linkage method of hierarchical clustering, see `linkage <https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html?highlight=linkage#scipy.cluster.hierarchy.linkage>`_ for more details. The default is 'single'. Posible values are: - 'single'. - 'complete'. - 'average'. - 'weighted'. - 'centroid'. - 'median'. - 'ward'. k : int, optional Number of clusters. This value is took instead of the optimal number of clusters calculated with the two difference gap statistic. The default is None. max_k : int, optional Max number of clusters used by the two difference gap statistic to find the optimal number of clusters. The default is 10. Returns ------- w : DataFrame The weights of optimal portfolio. """ # Correlation matrix from covariance matrix self.cov = self.returns.cov() if correlation in {"pearson", "spearman"}: self.corr = self.returns.corr(method=correlation) if correlation in {"abs_pearson", "abs_spearman"}: self.corr = np.abs(self.returns.corr(method=correlation[4:])) elif correlation == "distance": self.corr = af.dcorr_matrix(self.returns) # Step-1: Tree clustering if model == "HRP": self.clusters = self._hierarchical_clustering_hrp( linkage, leaf_order=leaf_order) elif model == "HERC": self.clusters, self.k = self._hierarchical_clustering_herc( linkage, max_k, leaf_order=leaf_order) if k is not None: self.k = int(k) # Step-2: Seriation (Quasi-Diagnalization) self.sort_order = self._seriation(self.clusters) asset_order = self.assetslist asset_order[:] = [self.assetslist[i] for i in self.sort_order] self.asset_order = asset_order self.corr_sorted = self.corr.reindex(index=self.asset_order, columns=self.asset_order) # Step-3: Recursive bisection if model == "HRP": weights = self._recursive_bisection(self.sort_order, rm=rm, rf=rf) elif model == "HERC": weights = self._hierarchical_recursive_bisection(self.clusters, rm=rm, rf=rf, linkage=linkage) weights = weights.loc[self.assetslist].to_frame() weights.columns = ["weights"] return weights