def factors_stats(self, method_mu="hist", method_cov="hist", **kwargs): r""" Calculate the inputs that will be use by the optimization method when we select the input model='FM'. Parameters ---------- **kwargs : dict All aditional parameters of risk_factors function. See Also -------- riskfolio.ParamsEstimation.forward_regression riskfolio.ParamsEstimation.backward_regression riskfolio.ParamsEstimation.loadings_matrix riskfolio.ParamsEstimation.risk_factors """ X = self.factors Y = self.returns mu, cov, returns, nav = pe.risk_factors( X, Y, method_mu=method_mu, method_cov=method_cov, **kwargs ) self.mu_fm = mu self.cov_fm = cov self.returns_fm = returns self.nav_fm = nav value = af.is_pos_def(self.cov_fm, threshold=1e-8) if value == False: print("You must convert self.cov_fm to a positive definite matrix")
def _hierarchical_clustering_herc(self, linkage="ward", max_k=10, leaf_order=True): # hierarchcial clustering dist = np.sqrt((1 - self.corr).round(8) / 2) dist = pd.DataFrame(dist, columns=self.corr.columns, index=self.corr.index) p_dist = squareform(dist, checks=False) clustering = hr.linkage(p_dist, method=linkage, optimal_ordering=leaf_order) # optimal number of clusters k = af.two_diff_gap_stat(self.corr, dist, clustering, max_k) return clustering, k
def assets_stats(self, method_mu="hist", method_cov="hist", **kwargs): r""" Calculate the inputs that will be use by the optimization method when we select the input model='Classic'. Parameters ---------- **kwargs : dict All aditional parameters of mean_vector and covar_matrix functions. See Also -------- riskfolio.ParamsEstimation.mean_vector riskfolio.ParamsEstimation.covar_matrix """ self.mu = pe.mean_vector(self.returns, method=method_mu, **kwargs) self.cov = pe.covar_matrix(self.returns, method=method_cov, **kwargs) value = af.is_pos_def(self.cov, threshold=1e-8) if value == False: print("You must convert self.cov to a positive definite matrix")
def optimization( self, model="HRP", correlation="pearson", covariance="hist", rm="MV", rf=0, linkage="single", k=None, max_k=10, leaf_order=True, d=0.94, ): r""" This method calculates the optimal portfolio according to the optimization model selected by the user. Parameters ---------- model : str can be {'HRP', 'HERC' or 'HERC2'} The hierarchical cluster portfolio model used for optimize the portfolio. The default is 'HRP'. Posible values are: - 'HRP': Hierarchical Risk Parity. - 'HERC': Hierarchical Equal Risk Contribution. - 'HERC2': HERC but splitting weights equally within clusters. correlation : str can be {'pearson', 'spearman' or 'distance'}. The correlation matrix used for create the clusters. The default is 'pearson'. Posible values are: - 'pearson': pearson correlation matrix. - 'spearman': spearman correlation matrix. - 'abs_pearson': absolute value pearson correlation matrix. - 'abs_spearman': absolute value spearman correlation matrix. - 'distance': distance correlation matrix. covariance : str, can be {'hist', 'ewma1', 'ewma2', 'ledoit', 'oas' or 'shrunk'} The method used to estimate the covariance matrix: The default is 'hist'. - 'hist': use historical estimates. - 'ewma1'': use ewma with adjust=True, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`_ for more details. - 'ewma2': use ewma with adjust=False, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`_ for more details. - 'ledoit': use the Ledoit and Wolf Shrinkage method. - 'oas': use the Oracle Approximation Shrinkage method. - 'shrunk': use the basic Shrunk Covariance method. rm : str, optional The risk measure used to optimze the portfolio. The default is 'MV'. Posible values are: - 'equal': Equally weighted. - 'vol': Standard Deviation. - 'MV': Variance. - 'MAD': Mean Absolute Deviation. - 'MSV': Semi Standard Deviation. - 'FLPM': First Lower Partial Moment (Omega Ratio). - 'SLPM': Second Lower Partial Moment (Sortino Ratio). - 'VaR': Value at Risk. - 'CVaR': Conditional Value at Risk. - 'EVaR': Entropic Value at Risk. - 'WR': Worst Realization (Minimax) - 'MDD': Maximum Drawdown of uncompounded cumulative returns (Calmar Ratio). - 'ADD': Average Drawdown of uncompounded cumulative returns. - 'DaR': Drawdown at Risk of uncompounded cumulative returns. - 'CDaR': Conditional Drawdown at Risk of uncompounded cumulative returns. - 'EDaR': Entropic Drawdown at Risk of uncompounded cumulative returns. - 'UCI': Ulcer Index of uncompounded cumulative returns. - 'MDD_Rel': Maximum Drawdown of compounded cumulative returns (Calmar Ratio). - 'ADD_Rel': Average Drawdown of compounded cumulative returns. - 'DaR_Rel': Drawdown at Risk of compounded cumulative returns. - 'CDaR_Rel': Conditional Drawdown at Risk of compounded cumulative returns. - 'EDaR_Rel': Entropic Drawdown at Risk of compounded cumulative returns. - 'UCI_Rel': Ulcer Index of compounded cumulative returns. rf : float, optional Risk free rate, must be in the same period of assets returns. The default is 0. linkage : string, optional Linkage method of hierarchical clustering, see `linkage <https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html?highlight=linkage#scipy.cluster.hierarchy.linkage>`_ for more details. The default is 'single'. Posible values are: - 'single'. - 'complete'. - 'average'. - 'weighted'. - 'centroid'. - 'median'. - 'ward'. k : int, optional Number of clusters. This value is took instead of the optimal number of clusters calculated with the two difference gap statistic. The default is None. max_k : int, optional Max number of clusters used by the two difference gap statistic to find the optimal number of clusters. The default is 10. leaf_order : bool, optional Indicates if the cluster are ordered so that the distance between successive leaves is minimal. The default is True. d : scalar The smoothing factor of ewma methods. The default is 0.94. Returns ------- w : DataFrame The weights of optimal portfolio. """ # Correlation matrix from covariance matrix self.cov = pe.covar_matrix(self.returns, method=covariance, d=0.94) if correlation in {"pearson", "spearman"}: self.corr = self.returns.corr(method=correlation).astype(float) if correlation in {"abs_pearson", "abs_spearman"}: self.corr = np.abs( self.returns.corr(method=correlation[4:])).astype(float) elif correlation == "distance": self.corr = af.dcorr_matrix(self.returns).astype(float) # Step-1: Tree clustering if model == "HRP": self.clusters = self._hierarchical_clustering_hrp( linkage, leaf_order=leaf_order) elif model in ["HERC", "HERC2"]: self.clusters, self.k = self._hierarchical_clustering_herc( linkage, max_k, leaf_order=leaf_order) if k is not None: self.k = int(k) # Step-2: Seriation (Quasi-Diagnalization) self.sort_order = self._seriation(self.clusters) asset_order = self.assetslist asset_order[:] = [self.assetslist[i] for i in self.sort_order] self.asset_order = asset_order self.corr_sorted = self.corr.reindex(index=self.asset_order, columns=self.asset_order) # Step-3: Recursive bisection if model == "HRP": weights = self._recursive_bisection(self.sort_order, rm=rm, rf=rf) elif model in ["HERC", "HERC2"]: weights = self._hierarchical_recursive_bisection(self.clusters, rm=rm, rf=rf, linkage=linkage, model=model) weights = weights.loc[self.assetslist].to_frame() weights.columns = ["weights"] return weights
def assets_clusters(returns, correlation="pearson", linkage="ward", k=None, max_k=10, leaf_order=True): r""" Create asset classes based on hierarchical clustering. Parameters ---------- returns : DataFrame Assets returns. correlation : str can be {'pearson', 'spearman' or 'distance'}. The correlation matrix used for create the clusters. The default is 'pearson'. Posible values are: - 'pearson': pearson correlation matrix. - 'spearman': spearman correlation matrix. - 'abs_pearson': absolute value pearson correlation matrix. - 'abs_spearman': absolute value spearman correlation matrix. - 'distance': distance correlation matrix. linkage : string, optional Linkage method of hierarchical clustering, see `linkage <https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html?highlight=linkage#scipy.cluster.hierarchy.linkage>`_ for more details. The default is 'single'. Posible values are: - 'single'. - 'complete'. - 'average'. - 'weighted'. - 'centroid'. - 'median'. - 'ward'. k : int, optional Number of clusters. This value is took instead of the optimal number of clusters calculated with the two difference gap statistic. The default is None. max_k : int, optional Max number of clusters used by the two difference gap statistic to find the optimal number of clusters. The default is 10. leaf_order : bool, optional Indicates if the cluster are ordered so that the distance between successive leaves is minimal. The default is True. Returns ------- clusters : DataFrame A dataframe with asset classes based on hierarchical clustering. Raises ------ ValueError when the value cannot be calculated. Examples -------- :: clusters = cf.assets_clusters(returns, correlation='pearson', linkage='ward', k=None, max_k=10, leaf_order=True) The clusters dataframe looks like this: .. image:: images/clusters_df.png """ if not isinstance(returns, pd.DataFrame): raise ValueError("returns must be a DataFrame") # Correlation matrix from covariance matrix if correlation in {"pearson", "spearman"}: corr = returns.corr(method=correlation) if correlation in {"abs_pearson", "abs_spearman"}: corr = np.abs(returns.corr(method=correlation[4:])) elif correlation == "distance": corr = af.dcorr_matrix(returns) # hierarchcial clustering dist = np.sqrt((1 - corr).round(8) / 2) dist = pd.DataFrame(dist, columns=corr.columns, index=corr.index) p_dist = squareform(dist, checks=False) clustering = hr.linkage(p_dist, method=linkage, optimal_ordering=leaf_order) if k is None: # optimal number of clusters k = af.two_diff_gap_stat(corr, dist, clustering, max_k) clusters_inds = hr.fcluster(clustering, k, criterion="maxclust") clusters = {"Assets": [], "Clusters": []} for i, v in enumerate(clusters_inds): clusters["Assets"].append(corr.columns.tolist()[i]) clusters["Clusters"].append("Cluster " + str(v)) clusters = pd.DataFrame(clusters) clusters = clusters.sort_values(by=["Assets"]) return clusters
def optimization( self, model="Classic", rm="MV", obj="Sharpe", rf=0, l=2, hist=True ): r""" This method that calculates the optimum portfolio according to the optimization model selected by the user. The general problem that solves is: .. math:: \begin{align} &\underset{x}{\text{optimize}} & & F(w)\\ &\text{s. t.} & & Aw \geq B\\ & & & \phi_{i}(w) \leq c_{i}\\ \end{align} Where: :math:`F(w)` is the objective function. :math:`Aw \geq B` is a set of linear constraints. :math:`\phi_{i}(w) \leq c_{i}` are constraints on maximum values of several risk measures. Parameters ---------- model : str can be 'Classic', 'BL' or 'FM' The model used for optimize the portfolio. The default is 'Classic'. Posible values are: - 'Classic': use estimates of expected return vector and covariance matrix that depends on historical data. - 'BL': use estimates of expected return vector and covariance matrix based on the Black Litterman model. - 'FM': use estimates of expected return vector and covariance matrix based on a Risk Factor model specified by the user. rm : str, optional The risk measure used to optimze the portfolio. The default is 'MV'. Posible values are: - 'MV': Standard Deviation. - 'MAD': Mean Absolute Deviation. - 'MSV': Semi Standard Deviation. - 'FLPM': First Lower Partial Moment (Omega Ratio). - 'SLPM': Second Lower Partial Moment (Sortino Ratio). - 'CVaR': Conditional Value at Risk. - 'WR': Worst Realization (Minimax) - 'MDD': Maximum Drawdown of uncompounded returns (Calmar Ratio). - 'ADD': Average Drawdown of uncompounded returns. - 'CDaR': Conditional Drawdown at Risk of uncompounded returns. obj : str can be {'MinRisk', 'Utility', 'Sharpe' or 'MaxRet'. Objective function of the optimization model. The default is 'Sharpe'. Posible values are: - 'MinRisk': Minimize the selected risk measure. - 'Utility': Maximize the Utility function :math:`mu w - l \phi_{i}(w)`. - 'Sharpe': Maximize the risk adjusted return ratio based on the selected risk measure. - 'MaxRet': Maximize the expected return of the portfolio. rf : float, optional Risk free rate, must be in the same period of assets returns. The default is 0. l : scalar, optional Risk aversion factor of the 'Utility' objective function. The default is 2. hist : bool, optional Indicate if uses historical or factor estimation of returns to calculate risk measures that depends on scenarios (All except 'MV' risk measure). The default is True. Returns ------- w : DataFrame The weights of optimum portfolio. """ # General model Variables mu = None sigma = None returns = None if model == "Classic": mu = np.matrix(self.mu) sigma = np.matrix(self.cov) returns = np.matrix(self.returns) nav = np.matrix(self.nav) elif model == "FM": mu = np.matrix(self.mu_fm) if hist == False: sigma = np.matrix(self.cov_fm) returns = np.matrix(self.returns_fm) nav = np.matrix(self.nav_fm) elif hist == True: sigma = np.matrix(self.cov) returns = np.matrix(self.returns) nav = np.matrix(self.nav) elif model == "BL": mu = np.matrix(self.mu_bl) if hist == False: sigma = np.matrix(self.cov_bl) elif hist == True: sigma = np.matrix(self.cov) returns = np.matrix(self.returns) nav = np.matrix(self.nav) elif model == "BL_FM": mu = np.matrix(self.mu_bl_fm) if hist == False: sigma = np.matrix(self.cov_bl_fm) returns = np.matrix(self.returns_fm) nav = np.matrix(self.nav_fm) elif hist == True: sigma = np.matrix(self.cov) returns = np.matrix(self.returns) nav = np.matrix(self.nav) # General Model Variables returns = np.matrix(returns) w = cv.Variable((mu.shape[1], 1)) k = cv.Variable((1, 1)) rf0 = cv.Parameter(nonneg=True) rf0.value = rf n = cv.Parameter(nonneg=True) n.value = returns.shape[0] ret = mu * w # MV Model Variables risk1 = cv.quad_form(w, sigma) returns_1 = af.cov_returns(sigma) * 1000 n1 = cv.Parameter(nonneg=True) n1.value = returns_1.shape[0] risk1_1 = cv.norm(returns_1 * w, "fro") / cv.sqrt(n1 - 1) # MAD Model Variables madmodel = False Y = cv.Variable((returns.shape[0], 1)) u = np.matrix(np.ones((returns.shape[0], 1)) * mu) a = returns - u risk2 = cv.sum(Y) / n # madconstraints=[a*w >= -Y, a*w <= Y, Y >= 0] madconstraints = [a * w <= Y, Y >= 0] # Semi Variance Model Variables risk3 = cv.norm(Y, "fro") / cv.sqrt(n - 1) # CVaR Model Variables alpha1 = self.alpha VaR = cv.Variable(1) alpha = cv.Parameter(nonneg=True) alpha.value = alpha1 X = returns * w Z = cv.Variable((returns.shape[0], 1)) risk4 = VaR + 1 / (alpha * n) * cv.sum(Z) cvarconstraints = [Z >= 0, Z >= -X - VaR] # Worst Realization (Minimax) Model Variables M = cv.Variable(1) risk5 = M wrconstraints = [-X <= M] # Lower Partial Moment Variables lpmmodel = False lpm = cv.Variable((returns.shape[0], 1)) lpmconstraints = [lpm >= 0] if obj == "Sharpe": lpmconstraints += [lpm >= rf0 * k - X] else: lpmconstraints += [lpm >= rf0 - X] # First Lower Partial Moment (Omega) Model Variables risk6 = cv.sum(lpm) / n # Second Lower Partial Moment (Sortino) Model Variables risk7 = cv.norm(lpm, "fro") / cv.sqrt(n - 1) # Drawdown Model Variables drawdown = False if obj == "Sharpe": X1 = k + nav * w else: X1 = 1 + nav * w U = cv.Variable((nav.shape[0] + 1, 1)) ddconstraints = [U[1:] >= X1, U[1:] >= U[:-1]] if obj == "Sharpe": ddconstraints += [U[1:] >= k, U[0] == k] else: ddconstraints += [U[1:] >= 1, U[0] == 1] # Maximum Drawdown Model Variables MDD = cv.Variable(1) risk8 = MDD mddconstraints = [MDD >= U[1:] - X1] # Average Drawdown Model Variables risk9 = 1 / n * cv.sum(U[1:] - X1) # Conditional Drawdown Model Variables CDaR = cv.Variable(1) Zd = cv.Variable((nav.shape[0], 1)) risk10 = CDaR + 1 / (alpha * n) * cv.sum(Zd) cdarconstraints = [Zd >= U[1:] - X1 - CDaR, Zd >= 0] # Tracking Error Model Variables c = self.benchweights if self.kindbench == True: bench = np.matrix(returns) * c else: bench = self.benchindex if obj == "Sharpe": TE = cv.norm(returns * w - bench * k, "fro") / cv.sqrt(n - 1) else: TE = cv.norm(returns * w - bench, "fro") / cv.sqrt(n - 1) # Problem aditional linear constraints if obj == "Sharpe": constraints = [ cv.sum(w) == self.upperlng * k, k >= 0, mu * w - rf0 * k == 1, ] if self.sht == False: constraints += [w <= self.upperlng * k, w * 1000 >= 0] elif self.sht == True: constraints += [ w <= self.upperlng * k, w >= -self.uppersht * k, cv.sum(cv.neg(w)) <= self.uppersht * k, ] else: constraints = [cv.sum(w) == self.upperlng] if self.sht == False: constraints += [w <= self.upperlng, w * 1000 >= 0] elif self.sht == True: constraints += [ w <= self.upperlng, w >= -self.uppersht, cv.sum(cv.neg(w)) <= self.uppersht, ] if self.ainequality is not None and self.binequality is not None: A = np.matrix(self.ainequality) B = np.matrix(self.binequality) if obj == "Sharpe": constraints += [A * w - B * k >= 0] else: constraints += [A * w - B >= 0] # Turnover Constraints if obj == "Sharpe": if self.allowTO == True: constraints += [cv.abs(w - c * k) * 1000 <= self.turnover * k * 1000] else: if self.allowTO == True: constraints += [cv.abs(w - c) * 1000 <= self.turnover * 1000] # Tracking error Constraints if obj == "Sharpe": if self.allowTE == True: constraints += [TE <= self.TE * k] else: if self.allowTE == True: constraints += [TE <= self.TE] # Problem risk Constraints if self.upperdev is not None: if obj == "Sharpe": constraints += [risk1_1 <= self.upperdev * k] else: constraints += [risk1 <= self.upperdev ** 2] if self.uppermad is not None: if obj == "Sharpe": constraints += [risk2 <= self.uppermad * k / 2] else: constraints += [risk2 <= self.uppermad / 2] madmodel = True if self.uppersdev is not None: if obj == "Sharpe": constraints += [risk3 <= self.uppersdev * k] else: constraints += [risk3 <= self.uppersdev] madmodel = True if self.upperCVaR is not None: if obj == "Sharpe": constraints += [risk4 <= self.upperCVaR * k] else: constraints += [risk4 <= self.upperCVaR] constraints += cvarconstraints if self.upperwr is not None: if obj == "Sharpe": constraints += [-X <= self.upperwr * k] else: constraints += [-X <= self.upperwr] constraints += wrconstraints if self.upperflpm is not None: if obj == "Sharpe": constraints += [risk6 <= self.upperflpm * k] else: constraints += [risk6 <= self.upperflpm] lpmmodel = True if self.upperslpm is not None: if obj == "Sharpe": constraints += [risk7 <= self.upperslpm * k] else: constraints += [risk7 <= self.upperslpm] lpmmodel = True if self.uppermdd is not None: if obj == "Sharpe": constraints += [U[1:] - X1 <= self.uppermdd * k] else: constraints += [U[1:] - X1 <= self.uppermdd] constraints += mddconstraints drawdown = True if self.upperadd is not None: if obj == "Sharpe": constraints += [risk9 <= self.upperadd * k] else: constraints += [risk9 <= self.upperadd] drawdown = True if self.upperCDaR is not None: if obj == "Sharpe": constraints += [risk10 <= self.upperCDaR * k] else: constraints += [risk10 <= self.upperCDaR] constraints += cdarconstraints drawdown = True # Defining risk function if rm == "MV": if model != "Classic": risk = risk1_1 elif model == "Classic": risk = risk1 elif rm == "MAD": risk = risk2 madmodel = True elif rm == "MSV": risk = risk3 madmodel = True elif rm == "CVaR": risk = risk4 if self.upperCVaR is None: constraints += cvarconstraints elif rm == "WR": risk = risk5 if self.upperwr is None: constraints += wrconstraints elif rm == "FLPM": risk = risk6 lpmmodel = True elif rm == "SLPM": risk = risk7 lpmmodel = True elif rm == "MDD": risk = risk8 drawdown = True if self.uppermdd is None: constraints += mddconstraints elif rm == "ADD": risk = risk9 drawdown = True elif rm == "CDaR": risk = risk10 drawdown = True if self.upperCDaR is None: constraints += cdarconstraints if madmodel == True: constraints += madconstraints if lpmmodel == True: constraints += lpmconstraints if drawdown == True: constraints += ddconstraints # Frontier Variables portafolio = {} for i in self.assetslist: portafolio.update({i: []}) # Optimization Process # Defining solvers solvers = [cv.ECOS, cv.SCS, cv.OSQP, cv.CVXOPT, cv.GLPK] # Defining objective function if obj == "Sharpe": if rm != "Classic": objective = cv.Minimize(risk) elif rm == "Classic": objective = cv.Minimize(risk * 1000) elif obj == "MinRisk": objective = cv.Minimize(risk) elif obj == "Utility": objective = cv.Maximize(ret - l * risk) elif obj == "MaxRet": objective = cv.Maximize(ret) try: prob = cv.Problem(objective, constraints) for solver in solvers: try: prob.solve( solver=solver, parallel=True, max_iters=2000, abstol=1e-10 ) except: pass if w.value is not None: break if obj == "Sharpe": weights = np.matrix(w.value / k.value).T else: weights = np.matrix(w.value).T if self.sht == False: weights = np.abs(weights) / np.sum(np.abs(weights)) for j in self.assetslist: portafolio[j].append(weights[0, self.assetslist.index(j)]) except: pass optimum = pd.DataFrame(portafolio, index=["weights"], dtype=np.float64).T return optimum
def blacklitterman_stats( self, P, Q, rf=0, w=None, delta=None, eq=True, method_mu="hist", method_cov="hist", **kwargs ): r""" Calculate the inputs that will be use by the optimization method when we select the input model='BL'. Parameters ---------- P : DataFrame of shape (n_views, n_assets) Analyst's views matrix, can be relative or absolute. Q: DataFrame of shape (n_views, 1) Expected returns of analyst's views. delta: float Risk aversion factor. The default value is 1. rf: scalar, optional Risk free rate. The default is 0. w : DataFrame of shape (n_assets, 1) Weights matrix, where n_assets is the number of assets. The default is None. eq: bool, optional Indicates if use equilibrum or historical excess returns. The default is True. **kwargs : dict Other variables related to the mean and covariance estimation. See Also -------- riskfolio.ParamsEstimation.black_litterman """ X = self.returns if w is None: w = self.benchweights if delta is None: a = np.matrix(self.mu) * np.matrix(w) delta = (a - rf) / (np.matrix(w).T * np.matrix(self.cov) * np.matrix(w)) delta = delta.item() mu, cov, w = pe.black_litterman( X=X, w=w, P=P, Q=Q, delta=delta, rf=rf, eq=eq, method_mu=method_mu, method_cov=method_cov, **kwargs ) self.mu_bl = mu self.cov_bl = cov value = af.is_pos_def(self.cov_bl, threshold=1e-8) if value == False: print("You must convert self.cov_bl to a positive definite matrix")
def optimization( self, model="HRP", correlation="pearson", rm="MV", rf=0, linkage="single", k=None, max_k=10, leaf_order=True, ): r""" This method calculates the optimal portfolio according to the optimization model selected by the user. Parameters ---------- model : str can be {'HRP' or 'HERC'} The hierarchical cluster portfolio model used for optimize the portfolio. The default is 'HRP'. Posible values are: - 'HRP': Hierarchical Risk Parity. - 'HERC': Hierarchical Equal Risk Contribution. correlation : str can be {'pearson', 'spearman' or 'distance'}. The correlation matrix used for create the clusters. The default is 'pearson'. Posible values are: - 'pearson': pearson correlation matrix. - 'spearman': spearman correlation matrix. - 'abs_pearson': absolute value pearson correlation matrix. - 'abs_spearman': absolute value spearman correlation matrix. - 'distance': distance correlation matrix. rm : str, optional The risk measure used to optimze the portfolio. The default is 'MV'. Posible values are: - 'vol': Standard Deviation. - 'MV': Variance. - 'MAD': Mean Absolute Deviation. - 'MSV': Semi Standard Deviation. - 'FLPM': First Lower Partial Moment (Omega Ratio). - 'SLPM': Second Lower Partial Moment (Sortino Ratio). - 'VaR': Value at Risk. - 'CVaR': Conditional Value at Risk. - 'EVaR': Entropic Value at Risk. - 'WR': Worst Realization (Minimax) - 'MDD': Maximum Drawdown of uncompounded cumulative returns (Calmar Ratio). - 'ADD': Average Drawdown of uncompounded cumulative returns. - 'DaR': Drawdown at Risk of uncompounded cumulative returns. - 'CDaR': Conditional Drawdown at Risk of uncompounded cumulative returns. - 'EDaR': Entropic Drawdown at Risk of uncompounded cumulative returns. - 'UCI': Ulcer Index of uncompounded cumulative returns. - 'MDD_Rel': Maximum Drawdown of compounded cumulative returns (Calmar Ratio). - 'ADD_Rel': Average Drawdown of compounded cumulative returns. - 'DaR_Rel': Drawdown at Risk of compounded cumulative returns. - 'CDaR_Rel': Conditional Drawdown at Risk of compounded cumulative returns. - 'EDaR_Rel': Entropic Drawdown at Risk of compounded cumulative returns. - 'UCI_Rel': Ulcer Index of compounded cumulative returns. rf : float, optional Risk free rate, must be in the same period of assets returns. The default is 0. linkage : string, optional Linkage method of hierarchical clustering, see `linkage <https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html?highlight=linkage#scipy.cluster.hierarchy.linkage>`_ for more details. The default is 'single'. Posible values are: - 'single'. - 'complete'. - 'average'. - 'weighted'. - 'centroid'. - 'median'. - 'ward'. k : int, optional Number of clusters. This value is took instead of the optimal number of clusters calculated with the two difference gap statistic. The default is None. max_k : int, optional Max number of clusters used by the two difference gap statistic to find the optimal number of clusters. The default is 10. Returns ------- w : DataFrame The weights of optimal portfolio. """ # Correlation matrix from covariance matrix self.cov = self.returns.cov() if correlation in {"pearson", "spearman"}: self.corr = self.returns.corr(method=correlation) if correlation in {"abs_pearson", "abs_spearman"}: self.corr = np.abs(self.returns.corr(method=correlation[4:])) elif correlation == "distance": self.corr = af.dcorr_matrix(self.returns) # Step-1: Tree clustering if model == "HRP": self.clusters = self._hierarchical_clustering_hrp( linkage, leaf_order=leaf_order) elif model == "HERC": self.clusters, self.k = self._hierarchical_clustering_herc( linkage, max_k, leaf_order=leaf_order) if k is not None: self.k = int(k) # Step-2: Seriation (Quasi-Diagnalization) self.sort_order = self._seriation(self.clusters) asset_order = self.assetslist asset_order[:] = [self.assetslist[i] for i in self.sort_order] self.asset_order = asset_order self.corr_sorted = self.corr.reindex(index=self.asset_order, columns=self.asset_order) # Step-3: Recursive bisection if model == "HRP": weights = self._recursive_bisection(self.sort_order, rm=rm, rf=rf) elif model == "HERC": weights = self._hierarchical_recursive_bisection(self.clusters, rm=rm, rf=rf, linkage=linkage) weights = weights.loc[self.assetslist].to_frame() weights.columns = ["weights"] return weights
def bootstrapping(X, kind='stationary', q=0.05, n_sim=3000, window=3, seed=0): r""" Estimates the uncertainty sets of mean and covariance matrix through the selected bootstrapping method. Parameters ---------- X : DataFrame of shape (n_samples, n_features) Features matrix, where n_samples is the number of samples and n_features is the number of features. kind : str The bootstrapping method. The default value is 'stationary'. Posible values are: - 'stationary': stationary bootstrapping method, see `StationaryBootstrap <https://bashtage.github.io/arch/bootstrap/generated/arch.bootstrap.StationaryBootstrap.html#arch.bootstrap.StationaryBootstrap>`_ for more details. - 'circular': circular bootstrapping method, see `CircularBlockBootstrap <https://bashtage.github.io/arch/bootstrap/generated/arch.bootstrap.CircularBlockBootstrap.html#arch.bootstrap.CircularBlockBootstrap>`_ for more details. - 'moving': moving bootstrapping method, see `MovingBlockBootstrap <https://bashtage.github.io/arch/bootstrap/generated/arch.bootstrap.MovingBlockBootstrap.html#arch.bootstrap.MovingBlockBootstrap>`_ for more details. q : scalar Significance level of the selected bootstrapping method. The default is 0.05. n_sim : scalar Number of simulations of the bootstrapping method. The default is 3000. window: Block size of the bootstrapping method. Must be greather than 1 and lower than the n_samples - n_features + 1 The default is 3. seed: Seed used to generate random numbers for bootstrapping method. The default is 0. Returns ------- mu_l : DataFrame The q/2 percentile of mean vector obtained through the selected bootstrapping method. mu_u : DataFrame The 1-q/2 percentile of mean vector obtained through the selected bootstrapping method. cov_l : DataFrame The q/2 percentile of covariance matrix obtained through the selected bootstrapping method. cov_u : DataFrame The 1-q/2 percentile of covariance matrix obtained through the selected bootstrapping method. cov_mu : DataFrame The covariance matrix of estimation errors of mean vector obtained through the selected bootstrapping method. We take the diagonal of this matrix following :cite:`b-fabozzi2007robust`. Raises ------ ValueError When the value cannot be calculated. """ if not isinstance(X, pd.DataFrame): raise ValueError("X must be a DataFrame") if window >= X.shape[0] - window + 1: raise ValueError("block must be lower than n_samples - window + 1") elif window <= 1: raise ValueError("block must be greather than 1") rs = np.random.RandomState(seed) cols = X.columns.tolist() m = len(cols) mus = np.zeros((n_sim, 1, m)) covs = np.zeros((n_sim, m, m)) if kind == 'stationary': gen = bs.StationaryBootstrap(window, X, random_state=rs) elif kind == 'circular': gen = bs.CircularBlockBootstrap(window, X, random_state=rs) elif kind == 'moving': gen = bs.MovingBlockBootstrap(window, X, random_state=rs) else: raise ValueError( "kind only can be 'stationary', 'circular' or 'moving'") i = 0 for data in gen.bootstrap(n_sim): A = data[0][0] mus[i] = A.mean().to_numpy().reshape(1, m) covs[i] = A.cov().to_numpy() i += 1 mu_l = np.percentile(mus, q / 2 * 100, axis=0, keepdims=True).reshape(1, m) mu_u = np.percentile(mus, 100 - q / 2 * 100, axis=0, keepdims=True).reshape(1, m) cov_l = np.percentile(covs, q / 2 * 100, axis=0, keepdims=True).reshape(m, m) cov_u = np.percentile(covs, 100 - q / 2 * 100, axis=0, keepdims=True).reshape(m, m) cov_mu = mus.reshape(n_sim, m) - X.mean().to_numpy().reshape(1, m) cov_mu = np.cov(cov_mu.T) mu_l = pd.DataFrame(mu_l, index=[0], columns=cols) mu_u = pd.DataFrame(mu_u, index=[0], columns=cols) cov_l = pd.DataFrame(cov_l, index=cols, columns=cols) cov_u = pd.DataFrame(cov_u, index=cols, columns=cols) cov_mu = np.diag(np.diag(cov_mu)) cov_mu = pd.DataFrame(cov_mu, index=cols, columns=cols) if au.is_pos_def(cov_l) == False: cov_l = au.cov_fix(cov_l, method="clipped", threshold=1e-3) if au.is_pos_def(cov_u) == False: cov_u = au.cov_fix(cov_u, method="clipped", threshold=1e-3) return mu_l, mu_u, cov_l, cov_u, cov_mu
def rp_optimization(self, model="Classic", rm="MV", rf=0, b=None, hist=True): r""" This method that calculates the risk parity portfolio according to the optimization model selected by the user. The general problem that solves is: .. math:: \begin{align} &\underset{w}{\min} & & R(w)\\ &\text{s.t.} & & b \log(w) \geq c\\ & & & w \geq 0 \\ \end{align} Where: :math:`R(w)` is the risk measure. :math:`b` is a vector of risk constraints. Parameters ---------- model : str can be 'Classic' or 'FM' The model used for optimize the portfolio. The default is 'Classic'. Posible values are: - 'Classic': use estimates of expected return vector and covariance matrix that depends on historical data. - 'FM': use estimates of expected return vector and covariance matrix based on a Risk Factor model specified by the user. rm : str, optional The risk measure used to optimze the portfolio. The default is 'MV'. Posible values are: - 'MV': Standard Deviation. - 'MAD': Mean Absolute Deviation. - 'MSV': Semi Standard Deviation. - 'FLPM': First Lower Partial Moment (Omega Ratio). - 'SLPM': Second Lower Partial Moment (Sortino Ratio). - 'CVaR': Conditional Value at Risk. - 'CDaR': Conditional Drawdown at Risk of uncompounded returns. rf : float, optional Risk free rate, must be in the same period of assets returns. Used for 'FLPM' and 'SLPM'. The default is 0. b : float, optional The vector of risk constraints per asset. The default is 1/n (number of assets). hist : bool, optional Indicate if uses historical or factor estimation of returns to calculate risk measures that depends on scenarios (All except 'MV' risk measure). The default is True. Returns ------- w : DataFrame The weights of optimum portfolio. """ # General model Variables mu = None sigma = None returns = None if model == "Classic": mu = np.array(self.mu, ndmin=2) sigma = np.array(self.cov, ndmin=2) returns = np.array(self.returns, ndmin=2) nav = np.array(self.nav, ndmin=2) elif model == "FM": mu = np.array(self.mu_fm, ndmin=2) if hist == False: sigma = np.array(self.cov_fm, ndmin=2) returns = np.array(self.returns_fm, ndmin=2) nav = np.array(self.nav_fm, ndmin=2) elif hist == True: sigma = np.array(self.cov, ndmin=2) returns = np.array(self.returns, ndmin=2) nav = np.array(self.nav, ndmin=2) # General Model Variables if b is None: b = np.ones((1, mu.shape[1])) b = b / mu.shape[1] returns = np.array(returns, ndmin=2) w = cv.Variable((mu.shape[1], 1)) rf0 = rf n = returns.shape[0] # MV Model Variables risk1 = cv.quad_form(w, sigma) returns_1 = af.cov_returns(sigma) * 1000 n1 = returns_1.shape[0] risk1_1 = cv.norm(returns_1 @ w, "fro") / cv.sqrt(n1 - 1) # MAD Model Variables Y = cv.Variable((returns.shape[0], 1)) u = np.ones((returns.shape[0], 1)) * mu a = returns - u risk2 = cv.sum(Y) / n # madconstraints=[a*w >= -Y, a*w <= Y, Y >= 0] madconstraints = [a @ w <= Y, Y >= 0] # Semi Variance Model Variables risk3 = cv.norm(Y, "fro") / cv.sqrt(n - 1) # CVaR Model Variables alpha1 = self.alpha VaR = cv.Variable((1, 1)) alpha = alpha1 X = returns @ w Z = cv.Variable((returns.shape[0], 1)) risk4 = VaR + 1 / (alpha * n) * cv.sum(Z) cvarconstraints = [Z >= 0, Z >= -X - VaR] # Lower Partial Moment Variables lpm = cv.Variable((returns.shape[0], 1)) lpmconstraints = [lpm >= 0, lpm >= rf0 - X] # First Lower Partial Moment (Omega) Model Variables risk6 = cv.sum(lpm) / n # Second Lower Partial Moment (Sortino) Model Variables risk7 = cv.norm(lpm, "fro") / cv.sqrt(n - 1) # Drawdown Model Variables X1 = 1 + nav @ w U = cv.Variable((nav.shape[0] + 1, 1)) ddconstraints = [ U[1:] * 1000 >= X1 * 1000, U[1:] * 1000 >= U[:-1] * 1000, U[1:] * 1000 >= 1 * 1000, U[0] * 1000 == 1 * 1000, ] # Conditional Drawdown Model Variables CDaR = cv.Variable((1, 1)) Zd = cv.Variable((nav.shape[0], 1)) risk10 = CDaR + 1 / (alpha * n) * cv.sum(Zd) cdarconstraints = [ Zd * 1000 >= U[1:] * 1000 - X1 * 1000 - CDaR * 1000, Zd * 1000 >= 0, ] # Defining risk function constraints = [] if rm == "MV": if model != "Classic": risk = risk1_1 elif model == "Classic": risk = risk1 elif rm == "MAD": risk = risk2 constraints += madconstraints elif rm == "MSV": risk = risk3 constraints += madconstraints elif rm == "CVaR": risk = risk4 constraints += cvarconstraints elif rm == "FLPM": risk = risk6 constraints += lpmconstraints elif rm == "SLPM": risk = risk7 constraints += lpmconstraints elif rm == "CDaR": risk = risk10 constraints += ddconstraints constraints += cdarconstraints # Frontier Variables portafolio = {} for i in self.assetslist: portafolio.update({i: []}) # Optimization Process # Defining solvers solvers = [cv.ECOS, cv.SCS, cv.OSQP, cv.CVXOPT] sol_params = { cv.ECOS: { "max_iters": 2000, "abstol": 1e-10 }, cv.SCS: { "max_iters": 2500, "eps": 1e-10 }, cv.OSQP: { "max_iter": 10000, "eps_abs": 1e-10 }, cv.CVXOPT: { "max_iters": 2000, "abstol": 1e-10 }, } # Defining objective function objective = cv.Minimize(risk * 1000) constraints += [b @ cv.log(w) * 1000 >= 1 * 1000, w * 1000 >= 0] try: prob = cv.Problem(objective, constraints) for solver in solvers: try: prob.solve(solver=solver, **sol_params[solver]) except: pass if w.value is not None: break weights = np.array(w.value, ndmin=2).T weights = np.abs(weights) / np.sum(np.abs(weights)) for j in self.assetslist: portafolio[j].append(weights[0, self.assetslist.index(j)]) except: pass rp_optimum = pd.DataFrame(portafolio, index=["weights"], dtype=np.float64).T return rp_optimum