def assets_clusters(returns,
                    correlation="pearson",
                    linkage="ward",
                    k=None,
                    max_k=10,
                    leaf_order=True):
    r"""
    Create asset classes based on hierarchical clustering.
    
    Parameters
    ----------
    returns : DataFrame
        Assets returns.

    correlation : str can be {'pearson', 'spearman' or 'distance'}.
        The correlation matrix used for create the clusters.
        The default is 'pearson'. Posible values are:

        - 'pearson': pearson correlation matrix.
        - 'spearman': spearman correlation matrix.
        - 'abs_pearson': absolute value pearson correlation matrix.
        - 'abs_spearman': absolute value spearman correlation matrix.
        - 'distance': distance correlation matrix.

    linkage : string, optional
        Linkage method of hierarchical clustering, see `linkage <https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html?highlight=linkage#scipy.cluster.hierarchy.linkage>`_ for more details.
        The default is 'single'. Posible values are:

        - 'single'.
        - 'complete'.
        - 'average'.
        - 'weighted'.
        - 'centroid'.
        - 'median'.
        - 'ward'.
    
    k : int, optional
        Number of clusters. This value is took instead of the optimal number
        of clusters calculated with the two difference gap statistic.
        The default is None.
    max_k : int, optional
        Max number of clusters used by the two difference gap statistic
        to find the optimal number of clusters. The default is 10.
    leaf_order : bool, optional
        Indicates if the cluster are ordered so that the distance between
        successive leaves is minimal. The default is True.
    
    Returns
    -------
    clusters : DataFrame
        A dataframe with asset classes based on hierarchical clustering.
            
    Raises
    ------
        ValueError when the value cannot be calculated.

    Examples
    --------

    ::

        clusters = cf.assets_clusters(returns, correlation='pearson', linkage='ward', k=None, max_k=10, leaf_order=True)


    The clusters dataframe looks like this:

    .. image:: images/clusters_df.png

    """

    if not isinstance(returns, pd.DataFrame):
        raise ValueError("returns must be a DataFrame")

    # Correlation matrix from covariance matrix
    if correlation in {"pearson", "spearman"}:
        corr = returns.corr(method=correlation)
    if correlation in {"abs_pearson", "abs_spearman"}:
        corr = np.abs(returns.corr(method=correlation[4:]))
    elif correlation == "distance":
        corr = af.dcorr_matrix(returns)

    # hierarchcial clustering
    dist = np.sqrt((1 - corr).round(8) / 2)
    dist = pd.DataFrame(dist, columns=corr.columns, index=corr.index)
    p_dist = squareform(dist, checks=False)
    clustering = hr.linkage(p_dist,
                            method=linkage,
                            optimal_ordering=leaf_order)

    if k is None:
        # optimal number of clusters
        k = af.two_diff_gap_stat(corr, dist, clustering, max_k)

    clusters_inds = hr.fcluster(clustering, k, criterion="maxclust")

    clusters = {"Assets": [], "Clusters": []}

    for i, v in enumerate(clusters_inds):
        clusters["Assets"].append(corr.columns.tolist()[i])
        clusters["Clusters"].append("Cluster " + str(v))

    clusters = pd.DataFrame(clusters)
    clusters = clusters.sort_values(by=["Assets"])

    return clusters
Beispiel #2
0
    def optimization(
        self,
        model="HRP",
        correlation="pearson",
        covariance="hist",
        rm="MV",
        rf=0,
        linkage="single",
        k=None,
        max_k=10,
        leaf_order=True,
        d=0.94,
    ):
        r"""
        This method calculates the optimal portfolio according to the
        optimization model selected by the user.

        Parameters
        ----------
        model : str can be {'HRP', 'HERC' or 'HERC2'}
            The hierarchical cluster portfolio model used for optimize the
            portfolio. The default is 'HRP'. Posible values are:

            - 'HRP': Hierarchical Risk Parity.
            - 'HERC': Hierarchical Equal Risk Contribution.
            - 'HERC2': HERC but splitting weights equally within clusters.

        correlation : str can be {'pearson', 'spearman' or 'distance'}.
            The correlation matrix used for create the clusters.
            The default is 'pearson'. Posible values are:

            - 'pearson': pearson correlation matrix.
            - 'spearman': spearman correlation matrix.
            - 'abs_pearson': absolute value pearson correlation matrix.
            - 'abs_spearman': absolute value spearman correlation matrix.
            - 'distance': distance correlation matrix.

        covariance : str, can be {'hist', 'ewma1', 'ewma2', 'ledoit', 'oas' or 'shrunk'}
            The method used to estimate the covariance matrix:
            The default is 'hist'.

            - 'hist': use historical estimates.
            - 'ewma1'': use ewma with adjust=True, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`_ for more details.
            - 'ewma2': use ewma with adjust=False, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`_ for more details.
            - 'ledoit': use the Ledoit and Wolf Shrinkage method.
            - 'oas': use the Oracle Approximation Shrinkage method.
            - 'shrunk': use the basic Shrunk Covariance method.

        rm : str, optional
            The risk measure used to optimze the portfolio.
            The default is 'MV'. Posible values are:

            - 'equal': Equally weighted.
            - 'vol': Standard Deviation.
            - 'MV': Variance.
            - 'MAD': Mean Absolute Deviation.
            - 'MSV': Semi Standard Deviation.
            - 'FLPM': First Lower Partial Moment (Omega Ratio).
            - 'SLPM': Second Lower Partial Moment (Sortino Ratio).
            - 'VaR': Value at Risk.
            - 'CVaR': Conditional Value at Risk.
            - 'EVaR': Entropic Value at Risk.
            - 'WR': Worst Realization (Minimax)
            - 'MDD': Maximum Drawdown of uncompounded cumulative returns (Calmar Ratio).
            - 'ADD': Average Drawdown of uncompounded cumulative returns.
            - 'DaR': Drawdown at Risk of uncompounded cumulative returns.
            - 'CDaR': Conditional Drawdown at Risk of uncompounded cumulative returns.
            - 'EDaR': Entropic Drawdown at Risk of uncompounded cumulative returns.
            - 'UCI': Ulcer Index of uncompounded cumulative returns.
            - 'MDD_Rel': Maximum Drawdown of compounded cumulative returns (Calmar Ratio).
            - 'ADD_Rel': Average Drawdown of compounded cumulative returns.
            - 'DaR_Rel': Drawdown at Risk of compounded cumulative returns.
            - 'CDaR_Rel': Conditional Drawdown at Risk of compounded cumulative returns.
            - 'EDaR_Rel': Entropic Drawdown at Risk of compounded cumulative returns.
            - 'UCI_Rel': Ulcer Index of compounded cumulative returns.

        rf : float, optional
            Risk free rate, must be in the same period of assets returns.
            The default is 0.
        linkage : string, optional
            Linkage method of hierarchical clustering, see `linkage <https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html?highlight=linkage#scipy.cluster.hierarchy.linkage>`_ for more details.
            The default is 'single'. Posible values are:

            - 'single'.
            - 'complete'.
            - 'average'.
            - 'weighted'.
            - 'centroid'.
            - 'median'.
            - 'ward'.

        k : int, optional
            Number of clusters. This value is took instead of the optimal number
            of clusters calculated with the two difference gap statistic.
            The default is None.
        max_k : int, optional
            Max number of clusters used by the two difference gap statistic
            to find the optimal number of clusters. The default is 10.
        leaf_order : bool, optional
            Indicates if the cluster are ordered so that the distance between
            successive leaves is minimal. The default is True.
        d : scalar
            The smoothing factor of ewma methods.
            The default is 0.94.

        Returns
        -------
        w : DataFrame
            The weights of optimal portfolio.

        """

        # Correlation matrix from covariance matrix
        self.cov = pe.covar_matrix(self.returns, method=covariance, d=0.94)

        if correlation in {"pearson", "spearman"}:
            self.corr = self.returns.corr(method=correlation).astype(float)
        if correlation in {"abs_pearson", "abs_spearman"}:
            self.corr = np.abs(
                self.returns.corr(method=correlation[4:])).astype(float)
        elif correlation == "distance":
            self.corr = af.dcorr_matrix(self.returns).astype(float)

        # Step-1: Tree clustering
        if model == "HRP":
            self.clusters = self._hierarchical_clustering_hrp(
                linkage, leaf_order=leaf_order)
        elif model in ["HERC", "HERC2"]:
            self.clusters, self.k = self._hierarchical_clustering_herc(
                linkage, max_k, leaf_order=leaf_order)
            if k is not None:
                self.k = int(k)

        # Step-2: Seriation (Quasi-Diagnalization)
        self.sort_order = self._seriation(self.clusters)
        asset_order = self.assetslist
        asset_order[:] = [self.assetslist[i] for i in self.sort_order]
        self.asset_order = asset_order
        self.corr_sorted = self.corr.reindex(index=self.asset_order,
                                             columns=self.asset_order)

        # Step-3: Recursive bisection
        if model == "HRP":
            weights = self._recursive_bisection(self.sort_order, rm=rm, rf=rf)
        elif model in ["HERC", "HERC2"]:
            weights = self._hierarchical_recursive_bisection(self.clusters,
                                                             rm=rm,
                                                             rf=rf,
                                                             linkage=linkage,
                                                             model=model)

        weights = weights.loc[self.assetslist].to_frame()
        weights.columns = ["weights"]

        return weights
Beispiel #3
0
    def optimization(
        self,
        model="HRP",
        correlation="pearson",
        rm="MV",
        rf=0,
        linkage="single",
        k=None,
        max_k=10,
        leaf_order=True,
    ):
        r"""
        This method calculates the optimal portfolio according to the
        optimization model selected by the user.
        
        Parameters
        ----------
        model : str can be {'HRP' or 'HERC'}
            The hierarchical cluster portfolio model used for optimize the
            portfolio. The default is 'HRP'. Posible values are:

            - 'HRP': Hierarchical Risk Parity.
            - 'HERC': Hierarchical Equal Risk Contribution.

        correlation : str can be {'pearson', 'spearman' or 'distance'}.
            The correlation matrix used for create the clusters.
            The default is 'pearson'. Posible values are:

            - 'pearson': pearson correlation matrix.
            - 'spearman': spearman correlation matrix.
            - 'abs_pearson': absolute value pearson correlation matrix.
            - 'abs_spearman': absolute value spearman correlation matrix.
            - 'distance': distance correlation matrix.

        rm : str, optional
            The risk measure used to optimze the portfolio.
            The default is 'MV'. Posible values are:
            
            - 'vol': Standard Deviation.
            - 'MV': Variance.
            - 'MAD': Mean Absolute Deviation.
            - 'MSV': Semi Standard Deviation.
            - 'FLPM': First Lower Partial Moment (Omega Ratio).
            - 'SLPM': Second Lower Partial Moment (Sortino Ratio).
            - 'VaR': Value at Risk.
            - 'CVaR': Conditional Value at Risk.
            - 'EVaR': Entropic Value at Risk.
            - 'WR': Worst Realization (Minimax)
            - 'MDD': Maximum Drawdown of uncompounded cumulative returns (Calmar Ratio).
            - 'ADD': Average Drawdown of uncompounded cumulative returns.
            - 'DaR': Drawdown at Risk of uncompounded cumulative returns.
            - 'CDaR': Conditional Drawdown at Risk of uncompounded cumulative returns.
            - 'EDaR': Entropic Drawdown at Risk of uncompounded cumulative returns.
            - 'UCI': Ulcer Index of uncompounded cumulative returns.
            - 'MDD_Rel': Maximum Drawdown of compounded cumulative returns (Calmar Ratio).
            - 'ADD_Rel': Average Drawdown of compounded cumulative returns.
            - 'DaR_Rel': Drawdown at Risk of compounded cumulative returns.
            - 'CDaR_Rel': Conditional Drawdown at Risk of compounded cumulative returns.
            - 'EDaR_Rel': Entropic Drawdown at Risk of compounded cumulative returns.
            - 'UCI_Rel': Ulcer Index of compounded cumulative returns.
                
        rf : float, optional
            Risk free rate, must be in the same period of assets returns.
            The default is 0.
        linkage : string, optional
            Linkage method of hierarchical clustering, see `linkage <https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html?highlight=linkage#scipy.cluster.hierarchy.linkage>`_ for more details.
            The default is 'single'. Posible values are:

            - 'single'.
            - 'complete'.
            - 'average'.
            - 'weighted'.
            - 'centroid'.
            - 'median'.
            - 'ward'.
        
        k : int, optional
            Number of clusters. This value is took instead of the optimal number
            of clusters calculated with the two difference gap statistic.
            The default is None.
        max_k : int, optional
            Max number of clusters used by the two difference gap statistic
            to find the optimal number of clusters. The default is 10.
        
        Returns
        -------
        w : DataFrame
            The weights of optimal portfolio.

        """

        # Correlation matrix from covariance matrix
        self.cov = self.returns.cov()
        if correlation in {"pearson", "spearman"}:
            self.corr = self.returns.corr(method=correlation)
        if correlation in {"abs_pearson", "abs_spearman"}:
            self.corr = np.abs(self.returns.corr(method=correlation[4:]))
        elif correlation == "distance":
            self.corr = af.dcorr_matrix(self.returns)

        # Step-1: Tree clustering
        if model == "HRP":
            self.clusters = self._hierarchical_clustering_hrp(
                linkage, leaf_order=leaf_order)
        elif model == "HERC":
            self.clusters, self.k = self._hierarchical_clustering_herc(
                linkage, max_k, leaf_order=leaf_order)
            if k is not None:
                self.k = int(k)

        # Step-2: Seriation (Quasi-Diagnalization)
        self.sort_order = self._seriation(self.clusters)
        asset_order = self.assetslist
        asset_order[:] = [self.assetslist[i] for i in self.sort_order]
        self.asset_order = asset_order
        self.corr_sorted = self.corr.reindex(index=self.asset_order,
                                             columns=self.asset_order)

        # Step-3: Recursive bisection
        if model == "HRP":
            weights = self._recursive_bisection(self.sort_order, rm=rm, rf=rf)
        elif model == "HERC":
            weights = self._hierarchical_recursive_bisection(self.clusters,
                                                             rm=rm,
                                                             rf=rf,
                                                             linkage=linkage)

        weights = weights.loc[self.assetslist].to_frame()
        weights.columns = ["weights"]

        return weights