Exemple #1
0
    def test_fit_kde(self):
        """
        Test the kernel fitting to a series of observations.
        """

        risk_estimators = RiskEstimators()

        # Values to fit kernel to and evaluation points
        observations = np.array([0.1, 0.2, 0.2, 0.3, 0.3, 0.3, 0.4, 0.4, 0.5])
        eval_points = np.array([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6])

        # Calculating the pdf in 7 chosen points
        pdf_kde = risk_estimators._fit_kde(observations,
                                           eval_points=eval_points,
                                           kde_bwidth=0.25)

        # Testing the values and if the pdf is symmetric
        self.assertEqual(pdf_kde[0.0], pdf_kde[0.6])
        self.assertEqual(pdf_kde[0.1], pdf_kde[0.5])
        self.assertEqual(pdf_kde[0.2], pdf_kde[0.4])
        self.assertAlmostEqual(pdf_kde[0.3], 1.44413, delta=1e-5)

        # Testing also on unique values of the set as a default output
        pdf_kde_default = risk_estimators._fit_kde(observations,
                                                   kde_bwidth=0.25)
        self.assertEqual(pdf_kde[0.1], pdf_kde_default[0.1])
        self.assertEqual(pdf_kde_default[0.2], pdf_kde_default[0.4])
Exemple #2
0
    def test_empirical_covariance(self):
        """
        Test the calculation of the Maximum likelihood covariance estimator.
        """

        risk_estimators = RiskEstimators()

        # Getting first three columns of data to be able to compare the output
        prices_dataframe = self.data.iloc[:, :3]
        returns_dataframe = self.returns.iloc[:, :3]

        # Expected resulting Maximum likelihood covariance estimator
        expected_cov = np.array([[4.6571e-04, 3.4963e-04, -1.6626e-05],
                                 [3.4963e-04, 3.7193e-04, -1.4957e-05],
                                 [-1.6626e-05, -1.4957e-05, 1.9237e-05]])

        # Using the Maximum likelihood covariance estimator on price data
        empirical_cov = risk_estimators.empirical_covariance(prices_dataframe,
                                                             price_data=True)

        # Using the Maximum likelihood covariance estimator on returns data
        empirical_cov_ret = risk_estimators.empirical_covariance(
            returns_dataframe, price_data=False)

        # Testing if the resulting covariance matrix is right
        np.testing.assert_almost_equal(empirical_cov, expected_cov, decimal=6)

        # And if the results for price and returns are the same
        np.testing.assert_almost_equal(empirical_cov,
                                       empirical_cov_ret,
                                       decimal=4)
Exemple #3
0
    def test_exponential_covariance(self):
        """
        Test the calculation of the Exponentially-weighted Covariance matrix.
        """

        risk_estimators = RiskEstimators()

        # Getting first three columns of data to be able to compare the output
        prices_dataframe = self.data.iloc[:, :3]
        returns_dataframe = self.returns.iloc[:, :3]

        # Expected Exponentially-weighted Covariance matrix
        expected_expon_cov = np.array(
            [[2.824303e-04, 3.215506e-04, -4.171518e-06],
             [3.215506e-04, 4.585646e-04, -1.868617e-05],
             [-4.171518e-06, -1.868617e-05, 8.684991e-06]])

        # Calculating the Exponentially-weighted Covariance matrix on price data with the span of 60
        expon_cov = risk_estimators.exponential_covariance(prices_dataframe,
                                                           price_data=True,
                                                           window_span=60)

        # Calculating the Exponentially-weighted Covariance matrix on price data with the span of 60
        expon_cov_ret = risk_estimators.exponential_covariance(
            returns_dataframe, price_data=False, window_span=60)

        # Testing if the resulting Semi-Covariance matrix is right
        np.testing.assert_almost_equal(expon_cov,
                                       expected_expon_cov,
                                       decimal=6)

        # And if the results for price and returns are the same
        np.testing.assert_almost_equal(np.array(expon_cov),
                                       np.array(expon_cov_ret),
                                       decimal=4)
Exemple #4
0
    def test_minimum_covariance_determinant(self):
        """
        Test the calculation of the Minimum Covariance Determinant.
        """

        risk_estimators = RiskEstimators()

        # Getting first three columns of data to be able to compare the output
        prices_dataframe = self.data.iloc[:, :3]
        returns_dataframe = self.returns.iloc[:, :3]

        # Expected resulting Minimum Covariance Determinant
        expected_cov = np.array([[1.5110e-04, 1.1322e-04, -5.2053e-06],
                                 [1.1322e-04, 1.4760e-06, -6.6961e-06],
                                 [-5.2053e-06, -6.6961e-06, 1.0874e-05]])

        # Using the Minimum Covariance Determinant algorithm on price data with random seed 0
        min_covar_determ = risk_estimators.minimum_covariance_determinant(
            prices_dataframe, price_data=True, random_state=0)

        # Using the Minimum Covariance Determinant algorithm on return data with random seed 0
        min_covar_determ_ret = risk_estimators.minimum_covariance_determinant(
            returns_dataframe, price_data=False, random_state=0)

        # Testing if the resulting covariance matrix is right
        np.testing.assert_almost_equal(min_covar_determ,
                                       expected_cov,
                                       decimal=4)

        # And if the results for price and returns are the same
        np.testing.assert_almost_equal(min_covar_determ,
                                       min_covar_determ_ret,
                                       decimal=4)
Exemple #5
0
    def test_denoised_corr_targ_shrink():
        """
        Test the second method of shrinkage of the eigenvalues associated with noise.
        """

        risk_estimators = RiskEstimators()

        # Eigenvalues and eigenvectors to use
        eigenvalues = np.array([[1.3562, 0, 0], [0, 0.9438, 0], [0, 0, 0.7]])
        eigenvectors = np.array(
            [[-3.69048184e-01, -9.29410263e-01, 1.10397126e-16],
             [-6.57192300e-01, 2.60956474e-01, 7.07106781e-01],
             [6.57192300e-01, -2.60956474e-01, 7.07106781e-01]])

        # Expected correlation matrix
        expected_corr = np.array([[1, 0.32892949, -0.32892949],
                                  [0.32892949, 1, -0.58573558],
                                  [-0.32892949, -0.58573558, 1]])

        # Finding the de-noised correlation matrix
        corr_matrix = risk_estimators._denoised_corr_targ_shrink(
            eigenvalues, eigenvectors, 1)

        # Testing if the de-noised correlation matrix is right
        np.testing.assert_almost_equal(corr_matrix, expected_corr, decimal=4)
Exemple #6
0
    def test_semi_covariance(self):
        """
        Test the calculation of the Semi-Covariance matrix.
        """

        risk_estimators = RiskEstimators()

        # Getting first three columns of data to be able to compare the output
        prices_dataframe = self.data.iloc[:, :3]
        returns_dataframe = self.returns.iloc[:, :3]

        # Expected Semi-Covariance matrix
        expected_semi_cov = np.array(
            [[7.302402e-05, 5.855724e-05, 3.075326e-06],
             [5.855724e-05, 6.285548e-05, 2.788988e-06],
             [3.075326e-06, 2.788988e-06, 3.221170e-06]])

        # Calculating the Semi-Covariance matrix on price data with zero threshold (volatility of negative returns)
        semi_cov = risk_estimators.semi_covariance(prices_dataframe,
                                                   price_data=True,
                                                   threshold_return=0)

        # Calculating the Semi-Covariance matrix on returns data with zero threshold (volatility of negative returns)
        semi_cov_ret = risk_estimators.semi_covariance(returns_dataframe,
                                                       price_data=False,
                                                       threshold_return=0)

        # Testing if the resulting Semi-Covariance matrix is right
        np.testing.assert_almost_equal(semi_cov, expected_semi_cov, decimal=6)

        # And if the results for price and returns are the same
        np.testing.assert_almost_equal(np.array(semi_cov),
                                       np.array(semi_cov_ret),
                                       decimal=4)
Exemple #7
0
    def test_get_pca():
        """
        Test the calculation of eigenvalues and eigenvectors from a Hermitian matrix.
        """

        risk_estimators = RiskEstimators()

        # Correlation matrix as an input
        corr_matrix = np.array([[1, 0.1, -0.1], [0.1, 1, -0.3],
                                [-0.1, -0.3, 1]])

        # Expected correlation matrix
        expected_eigenvalues = np.array([[1.3562, 0, 0], [0, 0.9438, 0],
                                         [0, 0, 0.7]])
        first_eigenvector = np.array(
            [-3.69048184e-01, -9.29410263e-01, 1.10397126e-16])

        # Finding the eigenvalues
        eigenvalues, eigenvectors = risk_estimators._get_pca(corr_matrix)

        # Testing eigenvalues and the first eigenvector
        np.testing.assert_almost_equal(eigenvalues,
                                       expected_eigenvalues,
                                       decimal=4)
        np.testing.assert_almost_equal(eigenvectors[0],
                                       first_eigenvector,
                                       decimal=5)
Exemple #8
0
 def __init__(self):
     self.weights = list()
     self.seriated_distances = None
     self.seriated_correlations = None
     self.ordered_indices = None
     self.clusters = None
     self.returns_estimator = ReturnsEstimators()
     self.risk_metrics = RiskMetrics()
     self.risk_estimator = RiskEstimators()
Exemple #9
0
    def test_filter_corr_hierarchical():
        """
        Test the filtering of the emperical correlation matrix.
        """

        risk_estimators = RiskEstimators()

        # Correlation matrix to test
        corr = np.array([[1, 0.70573243, 0.03085437, 0.6019651, 0.81214341],
                         [0.70573243, 1, 0.03126594, 0.56559443, 0.88961155],
                         [0.03085437, 0.03126594, 1, 0.01760481, 0.02842086],
                         [0.60196510, 0.56559443, 0.01760481, 1, 0.73827921],
                         [0.81214341, 0.88961155, 0.02842086, 0.73827921, 1]])

        expected_corr_avg = np.array(
            [[1, 0.44618396, 0.44618396, 0.44618396, 0.61711376],
             [0.44618396, 1, 0.29843018, 0.29843018, 0.61711376],
             [0.44618396, 0.29843018, 1, 0.01760481, 0.61711376],
             [0.44618396, 0.29843018, 0.01760481, 1, 0.61711376],
             [0.61711376, 0.61711376, 0.61711376, 0.61711376, 1]])

        expected_corr_single = np.array(
            [[1, 0.03126594, 0.03085437, 0.03085437, 0.03085437],
             [0.03126594, 1, 0.03126594, 0.03126594, 0.03126594],
             [0.03085437, 0.03126594, 1, 0.01760481, 0.02842086],
             [0.03085437, 0.03126594, 0.01760481, 1, 0.02842086],
             [0.03085437, 0.03126594, 0.02842086, 0.02842086, 1]])

        expected_corr_complete = np.array(
            [[1, 0.70573243, 0.70573243, 0.70573243, 0.88961155],
             [0.70573243, 1, 0.56559443, 0.56559443, 0.88961155],
             [0.70573243, 0.56559443, 1, 0.01760481, 0.88961155],
             [0.70573243, 0.56559443, 0.01760481, 1, 0.88961155],
             [0.88961155, 0.88961155, 0.88961155, 0.88961155, 1]])

        methods_list = ['complete', 'single', 'average']
        # Compute all methods with given correlation matrix
        corr_complete, corr_single, corr_average = [
            risk_estimators.filter_corr_hierarchical(corr, methods)
            for methods in methods_list
        ]

        # Test plot
        risk_estimators.filter_corr_hierarchical(corr, draw_plot=True)

        # Testing is filtered matrices are consistent with expected values.
        np.testing.assert_almost_equal(corr_complete,
                                       expected_corr_complete,
                                       decimal=4)
        np.testing.assert_almost_equal(corr_single,
                                       expected_corr_single,
                                       decimal=4)
        np.testing.assert_almost_equal(corr_average,
                                       expected_corr_avg,
                                       decimal=4)
Exemple #10
0
    def __init__(self, confidence_level=0.05):
        """
        Initialise.

        :param confidence_level: (float) The confidence level (alpha) used for calculating expected shortfall and conditional
                                         drawdown at risk.
        """

        self.weights = list()
        self.clusters = None
        self.ordered_indices = None
        self.cluster_children = None
        self.optimal_num_clusters = None
        self.returns_estimator = ReturnsEstimators()
        self.risk_estimator = RiskEstimators()
        self.risk_metrics = RiskMetrics()
        self.confidence_level = confidence_level
    def __init__(self, calculate_expected_returns='mean', risk_free_rate=0.03):
        """
        Constructor.

        :param calculate_expected_returns: (str) The method to use for calculation of expected returns.
                                                 Currently supports: ``mean``, ``exponential``.
        """

        self.weights = list()
        self.asset_names = None
        self.num_assets = None
        self.portfolio_risk = None
        self.portfolio_return = None
        self.portfolio_sharpe_ratio = None
        self.calculate_expected_returns = calculate_expected_returns
        self.returns_estimator = ReturnsEstimators()
        self.risk_estimators = RiskEstimators()
        self.weight_bounds = (0, 1)
        self.risk_free_rate = risk_free_rate
Exemple #12
0
    def test_detoned():
        """
        Test the de-toning of the correlation matrix.
        """

        risk_estimators = RiskEstimators()

        # Correlation matrix to use
        corr = np.array([[1, 0.1, -0.1], [0.1, 1, -0.3], [-0.1, -0.3, 1]])

        # Expected correlation matrix
        expected_corr = np.array([[1, -0.39391882, 0.39391882],
                                  [-0.39391882, 1, 0.6897809],
                                  [0.39391882, 0.6897809, 1]])

        # Finding the de-toned correlation matrix
        corr_matrix = risk_estimators._detoned_corr(corr)

        # Testing if the de-toned correlation matrix is right
        np.testing.assert_almost_equal(corr_matrix, expected_corr, decimal=4)
Exemple #13
0
    def test_pdf_fit(self):
        """
        Test the fit between empirical pdf and the theoretical pdf.
        """

        risk_estimators = RiskEstimators()

        # Values to calculate theoretical and empirical pdfs
        var = 0.6
        eigen_observations = np.array(
            [0.1, 0.2, 0.2, 0.3, 0.3, 0.3, 0.4, 0.4, 0.5])
        tn_relation = 2
        kde_bwidth = 0.4

        # Calculating the SSE
        pdf_kde = risk_estimators._pdf_fit(var, eigen_observations,
                                           tn_relation, kde_bwidth)

        # Testing the SSE value
        self.assertAlmostEqual(pdf_kde, 50.51326, delta=1e-5)
Exemple #14
0
    def test_cov_to_corr():
        """
        Test the deriving of the correlation matrix from a covariance matrix.
        """

        risk_estimators = RiskEstimators()

        # Covariance matrix
        cov_matrix = np.array([[0.01, 0.002, -0.001], [0.002, 0.04, -0.006],
                               [-0.001, -0.006, 0.01]])

        # Expected correlation matrix
        expected_matrix = np.array([[1, 0.1, -0.1], [0.1, 1, -0.3],
                                    [-0.1, -0.3, 1]])

        # Finding the covariance matrix
        corr_matrix = risk_estimators.cov_to_corr(cov_matrix)

        # Testing the first row of the matrix
        np.testing.assert_almost_equal(corr_matrix, expected_matrix, decimal=5)
Exemple #15
0
    def test_find_max_eval(self):
        """
        Test the search for maximum random eigenvalue.
        """

        risk_estimators = RiskEstimators()

        # Values to calculate theoretical and empirical pdfs
        eigen_observations = np.array(
            [0.1, 0.2, 0.2, 0.3, 0.3, 0.3, 0.4, 0.4, 0.5])
        tn_relation = 2
        kde_bwidth = 0.4

        # Optimizing and getting the maximum random eigenvalue and the optimal variation
        maximum_eigen, var = risk_estimators._find_max_eval(
            eigen_observations, tn_relation, kde_bwidth)

        # Testing the maximum random eigenvalue and the optimal variation
        self.assertAlmostEqual(maximum_eigen, 2.41011, delta=1e-5)
        self.assertAlmostEqual(var, 0.82702, delta=1e-5)
Exemple #16
0
    def test_mp_pdf(self):
        """
        Test the deriving of pdf of the Marcenko-Pastur distribution.
        """

        risk_estimators = RiskEstimators()

        # Properties for the distribution
        var = 0.1
        tn_relation = 5
        num_points = 5

        # Calculating the pdf in 5 points
        pdf_mp = risk_estimators._mp_pdf(var, tn_relation, num_points)

        # Testing the minimum and maximum and non-zero values of the pdf
        self.assertAlmostEqual(pdf_mp.index[0], 0.03056, delta=1e-4)
        self.assertAlmostEqual(pdf_mp.index[4], 0.20944, delta=1e-4)

        # Testing that the distribution curve is right
        self.assertTrue(pdf_mp.values[1] > pdf_mp.values[2] > pdf_mp.values[3])
Exemple #17
0
    def test_corr_to_cov():
        """
        Test the recovering of the covariance matrix from the correlation matrix.
        """

        risk_estimators = RiskEstimators()

        # Correlation matrix and the vector of standard deviations
        corr_matrix = np.array([[1, 0.1, -0.1], [0.1, 1, -0.3],
                                [-0.1, -0.3, 1]])
        std_vec = np.array([0.1, 0.2, 0.1])

        # Expected covariance matrix
        expected_matrix = np.array([[0.01, 0.002, -0.001],
                                    [0.002, 0.04, -0.006],
                                    [-0.001, -0.006, 0.01]])

        # Finding the covariance matrix
        cov_matrix = risk_estimators.corr_to_cov(corr_matrix, std_vec)

        # Testing the first row of the matrix
        np.testing.assert_almost_equal(cov_matrix, expected_matrix, decimal=5)
Exemple #18
0
    def test_hrp_with_input_as_distance_matrix(self):
        """
        Test HRP when passing a distance matrix as input.
        """

        hrp = HierarchicalRiskParity()
        returns = ReturnsEstimators().calculate_returns(asset_prices=self.data)
        covariance = returns.cov()
        corr = RiskEstimators.cov_to_corr(covariance)
        corr = pd.DataFrame(corr,
                            index=covariance.columns,
                            columns=covariance.columns)
        distance_matrix = np.sqrt((1 - corr).round(5) / 2)
        hrp.allocate(asset_names=self.data.columns,
                     covariance_matrix=covariance,
                     distance_matrix=distance_matrix)
        weights = hrp.weights.values[0]
        self.assertTrue((weights >= 0).all())
        self.assertTrue(len(weights) == self.data.shape[1])
        self.assertAlmostEqual(np.sum(weights), 1)
Exemple #19
0
    def test_filter_corr_hierarchical_warnings(self):
        """
        Test warnings while filtering of the emperical correlation matrix.
        """

        risk_estimators = RiskEstimators()

        # Testing input matrix with invalid inputs.
        corr = np.array([[1, 0.70573243], [0.70573243, 1]])
        bad_dimension = np.array([1, 0])
        bad_size = np.array([[1, 0, 1], [0, 1, 1]])
        non_positive = np.array([[1, -1], [0, 1]])
        non_sym = np.array([[0, 0], [0, 0]])

        # Lists to test the expected outputs
        bad_inputs = [bad_dimension, bad_size, non_positive, non_sym, corr]
        result = []

        # Testing for warnings
        with self.assertWarns(UserWarning):  # Warning for bad dimension
            result.append(
                risk_estimators.filter_corr_hierarchical(bad_dimension))

        with self.assertWarns(UserWarning):  # Warning for bad size
            result.append(risk_estimators.filter_corr_hierarchical(bad_size))

        with self.assertWarns(UserWarning):  # Warning for non-positive matrix
            result.append(
                risk_estimators.filter_corr_hierarchical(non_positive))

        with self.assertWarns(
                UserWarning):  # Warning for non-symmetrical matrix
            result.append(risk_estimators.filter_corr_hierarchical(non_sym))

        with self.assertWarns(
                UserWarning):  # Warning for invalid method parameter
            result.append(
                risk_estimators.filter_corr_hierarchical(corr, method='bad'))

        # Testing to see if failed return fetches the unfiltered correlation array
        for idx, res in enumerate(result):
            np.testing.assert_almost_equal(res, bad_inputs[idx], decimal=4)
Exemple #20
0
    def __init__(self,
                 weight_bounds=(0, 1),
                 calculate_expected_returns="mean"):
        """
        Initialise the storage arrays and some preprocessing.

        :param weight_bounds: (tuple) A tuple specifying the lower and upper bound ranges for the portfolio weights.
        :param calculate_expected_returns: (str) The method to use for calculation of expected returns.
                                                 Currently supports ``mean`` and ``exponential``
        """

        self.weight_bounds = weight_bounds
        self.calculate_expected_returns = calculate_expected_returns
        self.weights = list()
        self.lambdas = list()
        self.gammas = list()
        self.free_weights = list()
        self.max_sharpe = None
        self.min_var = None
        self.efficient_frontier_means = None
        self.efficient_frontier_sigma = None
        self.returns_estimator = ReturnsEstimators()
        self.risk_estimators = RiskEstimators()
Exemple #21
0
from definitions import ROOT_DIR

# Getting the data
stock_prices = pd.read_csv(join(ROOT_DIR, 'data/stock_prices.csv'),
                           parse_dates=True,
                           index_col='Date')
stock_prices = stock_prices.dropna(axis=1)
stock_prices.head()

# Leaving only x stocks in the dataset
stock_prices = stock_prices.iloc[:, :]
stock_prices.head()

# estimators
returns_estimation = ReturnsEstimators()
risk_estimators = RiskEstimators()

# stock returns
stock_returns = returns_estimation.calculate_returns(stock_prices)

# the simple covariance matrix
cov_matrix = stock_returns.cov()

# the De-noised Сovariance matrix
tn_relation = stock_prices.shape[0] / stock_prices.shape[1]
kde_bwidth = 0.25

cov_matrix_detoned = risk_estimators.denoise_covariance(cov_matrix,
                                                        tn_relation,
                                                        kde_bwidth,
                                                        detone=True)
Exemple #22
0
    def test_denoise_covariance():
        """
        Test the shrinkage the eigenvalues associated with noise.
        """

        risk_estimators = RiskEstimators()

        # Covariance matrix to de-noise and parameters for the theoretical distribution.
        cov_matrix = np.array([[0.01, 0.002, -0.001], [0.002, 0.04, -0.006],
                               [-0.001, -0.006, 0.01]])

        tn_relation = 50
        kde_bwidth = 0.25
        alpha = 0.2
        denoise_method = 'const_resid_eigen'
        denoise_method_alt = 'target_shrink'
        denoise_method_alt_2 = 'spectral'
        detone = False
        detone_alt = True
        market_component = 1

        # Expected de-noised covariance matrix
        expected_cov = np.array([[0.01, 0.00267029, -0.00133514],
                                 [0.00267029, 0.04, -0.00438387],
                                 [-0.00133514, -0.00438387, 0.01]])

        expected_cov_alt = np.array([[0.01, 0.0057, -0.0028],
                                     [0.0057, 0.04, -0.0106],
                                     [-0.0028, -0.0106, 0.01]])

        expected_cov_alt_2 = np.array([[0.01, 0.02,
                                        -0.01], [0.02, 0.04, -0.02],
                                       [-0.01, -0.02, 0.01]])

        expected_cov_detoned = np.array([[0.01, -0.0094, 0.0047],
                                         [-0.0094, 0.04, 0.0111],
                                         [0.0047, 0.0111, 0.01]])

        # Finding the de-noised covariance matrix
        cov_matrix_denoised = risk_estimators.denoise_covariance(
            cov_matrix, tn_relation, denoise_method, detone, market_component,
            kde_bwidth)

        # Using the alternative de-noising method
        cov_matrix_denoised_alt = risk_estimators.denoise_covariance(
            cov_matrix, tn_relation, denoise_method_alt, detone,
            market_component, kde_bwidth, alpha)

        # Using the second alternative of the de-noising method
        cov_matrix_denoised_alt_2 = risk_estimators.denoise_covariance(
            cov_matrix, tn_relation, denoise_method_alt_2, detone,
            market_component, kde_bwidth)

        # Finding the de-toned covariance matrix
        cov_matrix_detoned = risk_estimators.denoise_covariance(
            cov_matrix, tn_relation, denoise_method, detone_alt,
            market_component, kde_bwidth)

        # Testing if the de-noised covariance matrix is right
        np.testing.assert_almost_equal(cov_matrix_denoised,
                                       expected_cov,
                                       decimal=4)

        # Testing if the de-noised covariance matrix is right
        np.testing.assert_almost_equal(cov_matrix_denoised_alt,
                                       expected_cov_alt,
                                       decimal=4)

        # Testing if the de-noised covariance matrix is right
        np.testing.assert_almost_equal(cov_matrix_denoised_alt_2,
                                       expected_cov_alt_2,
                                       decimal=4)

        # Testing if the de-toned covariance matrix is right
        np.testing.assert_almost_equal(cov_matrix_detoned,
                                       expected_cov_detoned,
                                       decimal=4)
class MeanVarianceOptimisation:
    # pylint: disable=too-many-instance-attributes
    """
    This class implements some classic mean-variance optimisation techniques for calculating the efficient frontier solutions.
    With the help of quadratic optimisers, users can generate optimal portfolios for different objective functions. Currently
    solutions to the following portfolios can be generated:

        1. Inverse Variance
        2. Maximum Sharpe
        3. Minimum Volatility
        4. Efficient Risk
        5. Maximum Return - Minimum Volatility
        6. Efficient Return
        7. Maximum Diversification
        8. Maximum Decorrelation
        9. Custom Objective Function
    """
    def __init__(self, calculate_expected_returns='mean', risk_free_rate=0.03):
        """
        Constructor.

        :param calculate_expected_returns: (str) The method to use for calculation of expected returns.
                                                 Currently supports: ``mean``, ``exponential``.
        """

        self.weights = list()
        self.asset_names = None
        self.num_assets = None
        self.portfolio_risk = None
        self.portfolio_return = None
        self.portfolio_sharpe_ratio = None
        self.calculate_expected_returns = calculate_expected_returns
        self.returns_estimator = ReturnsEstimators()
        self.risk_estimators = RiskEstimators()
        self.weight_bounds = (0, 1)
        self.risk_free_rate = risk_free_rate

    def allocate(self,
                 asset_names=None,
                 asset_prices=None,
                 expected_asset_returns=None,
                 covariance_matrix=None,
                 solution='inverse_variance',
                 target_return=0.2,
                 target_risk=0.01,
                 risk_aversion=10,
                 weight_bounds=None):
        # pylint: disable=invalid-name, too-many-branches
        """
        Calculate the portfolio asset allocations using the method specified.

        :param asset_names: (list) A list of strings containing the asset names.
        :param asset_prices: (pd.DataFrame) A dataframe of historical asset prices (daily close).
        :param expected_asset_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        :param covariance_matrix: (pd.DataFrame/numpy matrix) User supplied covariance matrix of asset returns (sigma).
        :param solution: (str) The type of solution/algorithm to use to calculate the weights.
                               Supported solution strings - ``inverse_variance``, ``min_volatility``, ``max_sharpe``,
                               ``efficient_risk``, ``max_return_min_volatility``, ``max_diversification``, ``efficient_return``
                               and ``max_decorrelation``.
        :param target_return: (float) Target return of the portfolio.
        :param target_risk: (float) Target risk of the portfolio.
        :param risk_aversion: (float) Quantifies the risk averse nature of the investor - a higher value means
                                      more risk averse and vice-versa.
        :param weight_bounds: (dict/tuple) Can be either a single tuple of upper and lower bounds
                                           for all portfolio weights or a list of strings with each string representing
                                           an inequality on the weights. For e.g. to bound the weight of the 3rd asset
                                           pass the following weight bounds: ['weights[2] <= 0.3', 'weights[2] >= 0.1'].
        """

        self._error_checks(asset_names, asset_prices, expected_asset_returns,
                           covariance_matrix, solution)

        # Weight bounds
        if weight_bounds is not None:
            self.weight_bounds = weight_bounds

        # Calculate the expected asset returns and covariance matrix if not given by the user
        expected_asset_returns, covariance = self._calculate_estimators(
            asset_prices, expected_asset_returns, covariance_matrix)

        if solution == 'inverse_variance':
            self._inverse_variance(covariance=covariance,
                                   expected_returns=expected_asset_returns)
        elif solution == 'min_volatility':
            self._min_volatility(covariance=covariance,
                                 expected_returns=expected_asset_returns)
        elif solution == 'max_return_min_volatility':
            self._max_return_min_volatility(
                covariance=covariance,
                expected_returns=expected_asset_returns,
                risk_aversion=risk_aversion)
        elif solution == 'max_sharpe':
            self._max_sharpe(covariance=covariance,
                             expected_returns=expected_asset_returns)
        elif solution == 'efficient_risk':
            self._min_volatility_for_target_return(
                covariance=covariance,
                expected_returns=expected_asset_returns,
                target_return=target_return)
        elif solution == 'efficient_return':
            self._max_return_for_target_risk(
                covariance=covariance,
                expected_returns=expected_asset_returns,
                target_risk=target_risk)
        elif solution == 'max_diversification':
            self._max_diversification(covariance=covariance,
                                      expected_returns=expected_asset_returns)
        else:
            self._max_decorrelation(covariance=covariance,
                                    expected_returns=expected_asset_returns)

        # Calculate the portfolio sharpe ratio
        self.portfolio_sharpe_ratio = (
            (self.portfolio_return - self.risk_free_rate) /
            (self.portfolio_risk**0.5))

        # Do some post-processing of the weights
        self._post_process_weights()

    def allocate_custom_objective(self,
                                  non_cvxpy_variables,
                                  cvxpy_variables,
                                  objective_function,
                                  constraints=None):
        # pylint: disable=eval-used, exec-used
        """
        Create a portfolio using custom objective and constraints.

        :param non_cvxpy_variables: (dict) A dictionary of variables to be used for providing the required input matrices and
                                           other inputs required by the user. The key of dictionary will be the variable name
                                           while the value can be anything ranging from a numpy matrix, list, dataframe or number.
        :param cvxpy_variables: (list) This is a list of cvxpy specific variables that will be initialised in the format required
                                       by cvxpy. For e.g. ["risk = cp.quad_form(weights, covariance)"] where you are initialising
                                       a variable named "risk" using cvxpy. Note that cvxpy is being imported as "cp", so be sure
                                       to refer to cvxpy as cp.
        :param custom_objective: (str)  A custom objective function. You need to write it in the form
                                        expected by cvxpy. The objective will be a single string, e.g. 'cp.Maximise(
                                        expected_asset_returns)'.
        :param constraints: (list) a list of strings containing the optimisation constraints. For e.g. ['weights >= 0', 'weights <= 1']
        """

        # Initialise the non-cvxpy variables
        locals_ptr = locals()
        for variable_name, variable_value in non_cvxpy_variables.items():
            exec(variable_name + " = None")
            locals_ptr[variable_name] = variable_value

        self.num_assets = locals_ptr['num_assets']
        self.asset_names = list(map(str, range(self.num_assets)))
        if 'asset_names' in locals_ptr:
            self.asset_names = locals_ptr['asset_names']

        # Optimisation weights
        weights = cp.Variable(self.num_assets)
        weights.value = np.array([1 / self.num_assets] * self.num_assets)

        # Initialise cvxpy specific variables
        for variable in cvxpy_variables:
            exec(variable)

        # Optimisation objective and constraints
        allocation_objective = eval(objective_function)
        allocation_constraints = []
        for constraint in constraints:
            allocation_constraints.append(eval(constraint))

        # Define and solve the problem
        problem = cp.Problem(objective=allocation_objective,
                             constraints=allocation_constraints)
        problem.solve(warm_start=True)
        if weights.value is None:
            raise ValueError('No optimal set of weights found.')
        self.weights = weights.value

        # Calculate portfolio metrics
        if 'risk' in locals_ptr:
            self.portfolio_risk = locals_ptr['risk'].value
        if 'portfolio_return' in locals_ptr:
            self.portfolio_return = locals_ptr['portfolio_return'].value

        # Do some post-processing of the weights
        self._post_process_weights()

    def get_portfolio_metrics(self):
        """
        Prints the portfolio metrics - return, risk and Sharpe Ratio.
        """

        print("Portfolio Return = %s" % self.portfolio_return)
        print("Portfolio Risk = %s" % self.portfolio_risk)
        print("Portfolio Sharpe Ratio = %s" % self.portfolio_sharpe_ratio)

    def plot_efficient_frontier(self,
                                covariance,
                                expected_asset_returns,
                                min_return=0,
                                max_return=0.4,
                                risk_free_rate=0.05):
        # pylint: disable=broad-except
        """
        Plot the Markowitz efficient frontier.

        :param covariance: (pd.DataFrame) Covariance dataframe of asset returns.
        :param expected_asset_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        :param min_return: (float) Minimum target return.
        :param max_return: (float) Maximum target return.
        :param risk_free_rate: (float) The rate of return for a risk-free asset.
        """

        expected_returns = np.array(expected_asset_returns).reshape(
            (len(expected_asset_returns), 1))
        volatilities = []
        returns = []
        sharpe_ratios = []
        for portfolio_return in np.linspace(min_return, max_return, 100):
            try:
                self.allocate(covariance_matrix=covariance,
                              expected_asset_returns=expected_returns,
                              solution='efficient_risk',
                              target_return=portfolio_return)
                volatilities.append(self.portfolio_risk**0.5)
                returns.append(portfolio_return)
                sharpe_ratios.append((portfolio_return - risk_free_rate) /
                                     (self.portfolio_risk**0.5 + 1e-16))
            except Exception:
                continue
        max_sharpe_ratio_index = sharpe_ratios.index(max(sharpe_ratios))
        min_volatility_index = volatilities.index(min(volatilities))
        figure = plt.scatter(volatilities,
                             returns,
                             c=sharpe_ratios,
                             cmap='viridis')
        plt.colorbar(label='Sharpe Ratio')
        plt.scatter(volatilities[max_sharpe_ratio_index],
                    returns[max_sharpe_ratio_index],
                    marker='*',
                    color='g',
                    s=400,
                    label='Maximum Sharpe Ratio')
        plt.scatter(volatilities[min_volatility_index],
                    returns[min_volatility_index],
                    marker='*',
                    color='r',
                    s=400,
                    label='Minimum Volatility')
        plt.xlabel('Volatility')
        plt.ylabel('Return')
        plt.legend(loc='upper left')
        return figure

    def _error_checks(self,
                      asset_names,
                      asset_prices,
                      expected_asset_returns,
                      covariance_matrix,
                      solution=None):
        """
        Some initial error checks on the inputs.

        :param asset_names: (list) A list of strings containing the asset names.
        :param asset_prices: (pd.DataFrame) A dataframe of historical asset prices (daily close).
        :param expected_asset_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        :param covariance_matrix: (pd.DataFrame/numpy matrix) User supplied covariance matrix of asset returns (sigma).
        :param solution: (str) The type of solution/algorithm to use to calculate the weights.
                               Currently supported solution strings - inverse_variance, min_volatility, max_sharpe,
                               efficient_risk, max_return_min_volatility, max_diversification, efficient_return
                               and max_decorrelation.
        """

        if asset_prices is None and (expected_asset_returns is None
                                     or covariance_matrix is None):
            raise ValueError(
                "You need to supply either raw prices or expected returns "
                "and a covariance matrix of asset returns")

        if asset_prices is not None:
            if not isinstance(asset_prices, pd.DataFrame):
                raise ValueError("Asset prices matrix must be a dataframe")
            if not isinstance(asset_prices.index, pd.DatetimeIndex):
                raise ValueError(
                    "Asset prices dataframe must be indexed by date.")

        if solution is not None and solution not in {
                "inverse_variance", "min_volatility", "max_sharpe",
                "efficient_risk", "max_return_min_volatility",
                "max_diversification", "efficient_return", "max_decorrelation"
        }:
            raise ValueError(
                "Unknown solution string specified. Supported solutions - "
                "inverse_variance, min_volatility, max_sharpe, efficient_risk"
                "max_return_min_volatility, max_diversification, efficient_return and max_decorrelation"
            )

        if asset_names is None:
            if asset_prices is not None:
                asset_names = asset_prices.columns
            elif covariance_matrix is not None and isinstance(
                    covariance_matrix, pd.DataFrame):
                asset_names = covariance_matrix.columns
            else:
                raise ValueError("Please provide a list of asset names")
        self.asset_names = asset_names
        self.num_assets = len(asset_names)

    def _calculate_estimators(self, asset_prices, expected_asset_returns,
                              covariance_matrix):
        """
        Calculate the expected returns and covariance matrix of assets in the portfolio.

        :param asset_prices: (pd.DataFrame) A dataframe of historical asset prices (daily close).
        :param expected_asset_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        :param covariance_matrix: (pd.DataFrame/numpy matrix) User supplied covariance matrix of asset returns (sigma).
        :return: (np.array, pd.DataFrame) Expected asset returns and covariance matrix.
        """

        # Calculate the expected returns if the user does not supply any returns
        if expected_asset_returns is None:
            if self.calculate_expected_returns == "mean":
                expected_asset_returns = self.returns_estimator.calculate_mean_historical_returns(
                    asset_prices=asset_prices)
            elif self.calculate_expected_returns == "exponential":
                expected_asset_returns = self.returns_estimator.calculate_exponential_historical_returns(
                    asset_prices=asset_prices)
            else:
                raise ValueError(
                    "Unknown returns specified. Supported returns - mean, exponential"
                )
        expected_asset_returns = np.array(expected_asset_returns).reshape(
            (len(expected_asset_returns), 1))

        # Calculate covariance of returns or use the user specified covariance matrix
        if covariance_matrix is None:
            returns = self.returns_estimator.calculate_returns(
                asset_prices=asset_prices)
            covariance_matrix = returns.cov()
        cov = pd.DataFrame(covariance_matrix,
                           index=self.asset_names,
                           columns=self.asset_names)

        return expected_asset_returns, cov

    def _post_process_weights(self):
        """
        Check weights for very small numbers and numbers close to 1. A final post-processing of weights produced by the
        optimisation procedures.
        """

        # Round weights which are very very small negative numbers (e.g. -4.7e-16) to 0
        self.weights[self.weights < 0] = 0

        # If any of the weights is very close to one, we convert it to 1 and set the other asset weights to 0.
        if True in set(np.isclose(self.weights, 1)):
            almost_one_index = np.isclose(self.weights, 1)
            self.weights[almost_one_index] = 1
            self.weights[np.logical_not(almost_one_index)] = 0

        self.weights = pd.DataFrame(self.weights)
        self.weights.index = self.asset_names
        self.weights = self.weights.T

    def _inverse_variance(self, covariance, expected_returns):
        """
        Calculate weights using inverse-variance allocation.

        :param covariance: (pd.DataFrame) Covariance dataframe of asset returns.
        :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        """

        ivp = 1. / np.diag(covariance)
        ivp /= ivp.sum()
        self.weights = ivp
        self.portfolio_risk = np.dot(self.weights,
                                     np.dot(covariance.values, self.weights.T))
        self.portfolio_return = np.dot(self.weights, expected_returns)[0]

    def _min_volatility(self, covariance, expected_returns):
        # pylint: disable=eval-used
        """
        Compute minimum volatility portfolio allocation.

        :param covariance: (pd.DataFrame) Covariance dataframe of asset returns.
        :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        """

        weights = cp.Variable(self.num_assets)
        weights.value = np.array([1 / self.num_assets] * self.num_assets)
        risk = cp.quad_form(weights, covariance)
        portfolio_return = cp.matmul(weights, expected_returns)

        # Optimisation objective and constraints
        allocation_objective = cp.Minimize(0.5 * risk)
        allocation_constraints = [
            cp.sum(weights) == 1,
        ]
        if isinstance(self.weight_bounds, tuple):
            allocation_constraints.extend([
                weights >= self.weight_bounds[0],
                weights <= min(self.weight_bounds[1], 1)
            ])
        else:
            for inequality in self.weight_bounds:
                allocation_constraints.append(eval(inequality))

            # Add the hard-boundaries for weights.
            allocation_constraints.extend([weights <= 1, weights >= 0])

        # Define and solve the problem
        problem = cp.Problem(objective=allocation_objective,
                             constraints=allocation_constraints)
        problem.solve(warm_start=True)
        if weights.value is None:
            raise ValueError('No optimal set of weights found.')

        self.weights = weights.value
        self.portfolio_risk = risk.value
        self.portfolio_return = portfolio_return.value[0]

    def _max_return_min_volatility(self, covariance, expected_returns,
                                   risk_aversion):
        # pylint: disable=eval-used
        """
        Calculate maximum return-minimum volatility portfolio allocation.

        :param covariance: (pd.DataFrame) Covariance dataframe of asset returns.
        :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        :param risk_aversion: (float) Quantifies the risk-averse nature of the investor - a higher value means
                           more risk averse and vice-versa.
        """

        weights = cp.Variable(self.num_assets)
        weights.value = np.array([1 / self.num_assets] * self.num_assets)
        portfolio_return = cp.matmul(weights, expected_returns)
        risk = cp.quad_form(weights, covariance)

        # Optimisation objective and constraints
        allocation_objective = cp.Minimize(risk_aversion * risk -
                                           portfolio_return)
        allocation_constraints = [cp.sum(weights) == 1]
        if isinstance(self.weight_bounds, tuple):
            allocation_constraints.extend([
                weights >= self.weight_bounds[0],
                weights <= min(self.weight_bounds[1], 1)
            ])
        else:
            for inequality in self.weight_bounds:
                allocation_constraints.append(eval(inequality))

            # Add the hard-boundaries for weights.
            allocation_constraints.extend([weights <= 1, weights >= 0])

        # Define and solve the problem
        problem = cp.Problem(objective=allocation_objective,
                             constraints=allocation_constraints)
        problem.solve(warm_start=True)
        if weights.value is None:
            raise ValueError('No optimal set of weights found.')

        self.weights = weights.value
        self.portfolio_risk = risk.value
        self.portfolio_return = portfolio_return.value[0]

    def _max_sharpe(self, covariance, expected_returns):
        # pylint: disable=invalid-name, eval-used
        """
        Compute maximum Sharpe portfolio allocation.

        :param covariance: (pd.DataFrame) Covariance dataframe of asset returns.
        :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        """

        y = cp.Variable(self.num_assets)
        y.value = np.array([1 / self.num_assets] * self.num_assets)
        kappa = cp.Variable(1)
        risk = cp.quad_form(y, covariance)
        weights = y / kappa
        portfolio_return = cp.matmul(weights, expected_returns)

        # Optimisation objective and constraints
        allocation_objective = cp.Minimize(risk)
        allocation_constraints = [
            cp.sum((expected_returns - self.risk_free_rate).T @ y) == 1,
            cp.sum(y) == kappa, kappa >= 0
        ]
        if isinstance(self.weight_bounds, tuple):
            allocation_constraints.extend([
                y >= kappa * self.weight_bounds[0],
                y <= kappa * self.weight_bounds[1]
            ])
        else:
            for inequality in self.weight_bounds:
                allocation_constraints.append(eval(inequality))

            # Add the hard-boundaries for weights.
            allocation_constraints.extend([y <= kappa, y >= 0])

        # Define and solve the problem
        problem = cp.Problem(objective=allocation_objective,
                             constraints=allocation_constraints)
        problem.solve(warm_start=True)
        if y.value is None or kappa.value is None:
            raise ValueError('No optimal set of weights found.')

        self.weights = weights.value
        self.portfolio_risk = risk.value
        self.portfolio_return = portfolio_return.value[0]

    def _min_volatility_for_target_return(self, covariance, expected_returns,
                                          target_return):
        # pylint: disable=eval-used
        """
        Calculate minimum volatility portfolio for a given target return.

        :param covariance: (pd.DataFrame) Covariance dataframe of asset returns.
        :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        :param target_return: (float) Target return of the portfolio.
        """

        weights = cp.Variable(self.num_assets)
        weights.value = np.array([1 / self.num_assets] * self.num_assets)
        risk = cp.quad_form(weights, covariance)
        portfolio_return = cp.matmul(weights, expected_returns)

        # Optimisation objective and constraints
        allocation_objective = cp.Minimize(risk)
        allocation_constraints = [
            cp.sum(weights) == 1,
            portfolio_return >= target_return,
        ]
        if isinstance(self.weight_bounds, tuple):
            allocation_constraints.extend([
                weights >= self.weight_bounds[0],
                weights <= min(self.weight_bounds[1], 1)
            ])
        else:
            for inequality in self.weight_bounds:
                allocation_constraints.append(eval(inequality))

            # Add the hard-boundaries for weights.
            allocation_constraints.extend([weights <= 1, weights >= 0])

        # Define and solve the problem
        problem = cp.Problem(objective=allocation_objective,
                             constraints=allocation_constraints)
        problem.solve()
        if weights.value is None:
            raise ValueError('No optimal set of weights found.')

        self.weights = weights.value
        self.portfolio_risk = risk.value
        self.portfolio_return = target_return

    def _max_return_for_target_risk(self, covariance, expected_returns,
                                    target_risk):
        # pylint: disable=eval-used
        """
        Calculate maximum return for a given target volatility/risk.

        :param covariance: (pd.DataFrame) Covariance dataframe of asset returns.
        :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        :param target_risk: (float) Target risk of the portfolio.
        """

        weights = cp.Variable(self.num_assets)
        weights.value = np.array([1 / self.num_assets] * self.num_assets)
        portfolio_return = cp.matmul(weights, expected_returns)
        risk = cp.quad_form(weights, covariance)

        # Optimisation objective and constraints
        allocation_objective = cp.Maximize(portfolio_return)
        allocation_constraints = [cp.sum(weights) == 1, risk <= target_risk]
        if isinstance(self.weight_bounds, tuple):
            allocation_constraints.extend([
                weights >= self.weight_bounds[0],
                weights <= min(self.weight_bounds[1], 1)
            ])
        else:
            for inequality in self.weight_bounds:
                allocation_constraints.append(eval(inequality))

            # Add the hard-boundaries for weights.
            allocation_constraints.extend([weights <= 1, weights >= 0])

        # Define and solve the problem
        problem = cp.Problem(objective=allocation_objective,
                             constraints=allocation_constraints)
        problem.solve()
        if weights.value is None:
            raise ValueError('No optimal set of weights found.')

        self.weights = weights.value
        self.portfolio_risk = target_risk
        self.portfolio_return = portfolio_return.value[0]

    def _max_diversification(self, covariance, expected_returns):
        """
        Calculate the maximum diversified portfolio.

        :param covariance: (pd.DataFrame) Covariance dataframe of asset returns.
        :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        """

        self._max_decorrelation(covariance, expected_returns)

        # Divide weights by individual asset volatilities
        self.weights /= np.diag(covariance)

        # Standardize weights
        self.weights /= np.sum(self.weights)

        portfolio_return = np.dot(expected_returns.T, self.weights)[0]
        risk = np.dot(self.weights, np.dot(covariance, self.weights.T))

        self.portfolio_risk = risk
        self.portfolio_return = portfolio_return

    def _max_decorrelation(self, covariance, expected_returns):
        # pylint: disable=eval-used
        """
        Calculate the maximum decorrelated portfolio.

        :param covariance: (pd.DataFrame) Covariance dataframe of asset returns.
        :param expected_returns: (list/np.array/pd.dataframe) A list of mean stock returns (mu).
        """

        weights = cp.Variable(self.num_assets)
        weights.value = np.array([1 / self.num_assets] * self.num_assets)
        risk = cp.quad_form(weights, covariance)
        portfolio_return = cp.matmul(weights, expected_returns)
        corr = self.risk_estimators.cov_to_corr(covariance)
        portfolio_correlation = cp.quad_form(weights, corr)

        # Optimisation objective and constraints
        allocation_objective = cp.Minimize(portfolio_correlation)
        allocation_constraints = [cp.sum(weights) == 1]
        if isinstance(self.weight_bounds, tuple):
            allocation_constraints.extend([
                weights >= self.weight_bounds[0],
                weights <= min(self.weight_bounds[1], 1)
            ])
        else:
            for inequality in self.weight_bounds:
                allocation_constraints.append(eval(inequality))

            # Add the hard-boundaries for weights.
            allocation_constraints.extend([weights <= 1, weights >= 0])

        # Define and solve the problem
        problem = cp.Problem(objective=allocation_objective,
                             constraints=allocation_constraints)
        problem.solve(warm_start=True)
        if weights.value is None:
            raise ValueError('No optimal set of weights found.')

        self.weights = weights.value
        self.portfolio_risk = risk.value
        self.portfolio_return = portfolio_return.value[0]
Exemple #24
0
class HierarchicalEqualRiskContribution:
    """
    This class implements the Hierarchical Equal Risk Contribution (HERC) algorithm and it's extended components mentioned in the
    following papers: `Raffinot, Thomas, The Hierarchical Equal Risk Contribution Portfolio (August 23,
    2018). <https://ssrn.com/abstract=3237540>`_; and `Raffinot, Thomas, Hierarchical Clustering Based Asset Allocation (May 2017)
    <https://ssrn.com/abstract=2840729>`_;

    While the vanilla Hierarchical Risk Parity algorithm uses only the variance as a risk measure for assigning weights, the HERC
    algorithm proposed by Raffinot, allows investors to use other risk metrics like Standard Deviation, Expected Shortfall and
    Conditional Drawdown at Risk.
    """

    UniqueColors = ['darkred', 'deepskyblue', 'springgreen', 'darkorange', 'deeppink', 'slateblue', 'navy', 'blueviolet',
                    'pink', 'darkslategray']
    UnclusteredColor = "#808080"
    def __init__(self, confidence_level=0.05):
        """
        Initialise.

        :param confidence_level: (float) The confidence level (alpha) used for calculating expected shortfall and conditional
                                         drawdown at risk.
        """

        self.weights = list()
        self.clusters = None
        self.ordered_indices = None
        self.cluster_children = None
        self.optimal_num_clusters = None
        self.returns_estimator = ReturnsEstimators()
        self.risk_estimator = RiskEstimators()
        self.risk_metrics = RiskMetrics()
        self.confidence_level = confidence_level

    def allocate(self, asset_names=None, asset_prices=None, asset_returns=None, covariance_matrix=None,
                 risk_measure='equal_weighting', linkage='ward', optimal_num_clusters=None):
        # pylint: disable=too-many-branches
        """
        Calculate asset allocations using the Hierarchical Equal Risk Contribution algorithm.

        :param asset_names: (list) A list of strings containing the asset names.
        :param asset_prices: (pd.DataFrame) A dataframe of historical asset prices (daily close)
                                            indexed by date.
        :param asset_returns: (pd.DataFrame/numpy matrix) User supplied matrix of asset returns.
        :param covariance_matrix: (pd.DataFrame/numpy matrix) User supplied covariance matrix of asset returns.
        :param risk_measure: (str) The metric used for calculating weight allocations. Supported strings - ``equal_weighting``,
                                   ``variance``, ``standard_deviation``, ``expected_shortfall``, ``conditional_drawdown_risk``.
        :param linkage: (str) The type of linkage method to use for clustering. Supported strings - ``single``, ``average``,
                              ``complete``, ``ward``.
        :param optimal_num_clusters: (int) Optimal number of clusters for hierarchical clustering.
        """

        # Perform error checks
        self._error_checks(asset_prices, asset_returns, risk_measure, covariance_matrix)

        if asset_names is None:
            if asset_prices is not None:
                asset_names = asset_prices.columns
            elif asset_returns is not None and isinstance(asset_returns, pd.DataFrame):
                asset_names = asset_returns.columns
            else:
                raise ValueError("Please provide a list of asset names")

        # Calculate the returns if the user does not supply a returns dataframe
        if asset_returns is None and (risk_measure in {'expected_shortfall', 'conditional_drawdown_risk'} or covariance_matrix is
                                      None or not optimal_num_clusters):
            asset_returns = self.returns_estimator.calculate_returns(asset_prices=asset_prices)
        asset_returns = pd.DataFrame(asset_returns, columns=asset_names)

        # Calculate covariance of returns or use the user specified covariance matrix
        if covariance_matrix is None:
            covariance_matrix = asset_returns.cov()
        cov = pd.DataFrame(covariance_matrix, index=asset_names, columns=asset_names)

        # Calculate correlation from covariance matrix
        corr = self.risk_estimator.cov_to_corr(cov)

        # Calculate the optimal number of clusters
        if not optimal_num_clusters:
            self.optimal_num_clusters = self._get_optimal_number_of_clusters(correlation=corr,
                                                                             linkage=linkage,
                                                                             asset_returns=asset_returns)
        else:
            self.optimal_num_clusters = self._check_max_number_of_clusters(num_clusters=optimal_num_clusters,
                                                                           linkage=linkage,
                                                                           correlation=corr)

        # Tree Clustering
        self.clusters, self.cluster_children = self._tree_clustering(correlation=corr,
                                                                     linkage=linkage)

        # Get the flattened order of assets in hierarchical clustering tree
        num_assets = len(asset_names)
        self.ordered_indices = self._quasi_diagnalization(num_assets, 2 * num_assets - 2)

        # Recursive Bisection
        self._recursive_bisection(asset_returns=asset_returns,
                                  covariance_matrix=cov,
                                  assets=asset_names,
                                  risk_measure=risk_measure)

    def plot_clusters(self, assets):
        """
        Plot a dendrogram of the hierarchical clusters.

        :param assets: (list) Asset names in the portfolio
        :return: (dict) Dendrogram
        """

        colors = dict()
        for cluster_idx, children in self.cluster_children.items():
            color = self.UniqueColors[cluster_idx]

            for child in children:
                colors[assets[child]] = color
        dendrogram_plot = dendrogram(self.clusters, labels=assets, link_color_func=lambda k: self.UnclusteredColor)
        plot_axis = plt.gca()
        xlbls = plot_axis.get_xmajorticklabels()
        for lbl in xlbls:
            lbl.set_color(colors[lbl.get_text()])
        return dendrogram_plot

    @staticmethod
    def _compute_cluster_inertia(labels, asset_returns):
        """
        Calculate the cluster inertia (within cluster sum-of-squares).

        :param labels: (list) Cluster labels.
        :param asset_returns: (pd.DataFrame) Historical asset returns.
        :return: (float) Cluster inertia value.
        """

        unique_labels = np.unique(labels)
        inertia = [np.mean(pairwise_distances(asset_returns[:, labels == label])) for label in unique_labels]
        inertia = np.log(np.sum(inertia))
        return inertia

    @staticmethod
    def _check_max_number_of_clusters(num_clusters, linkage, correlation):
        """
        In some cases, the optimal number of clusters value given by the users is greater than the maximum number of clusters
        possible with the given data. This function checks this and assigns the proper value to the number of clusters when the
        given value exceeds maximum possible clusters.

        :param num_clusters: (int) The number of clusters.
        :param linkage (str): The type of linkage method to use for clustering.
        :param correlation: (np.array) Matrix of asset correlations.
        :return: (int) New value for number of clusters.
        """

        distance_matrix = np.sqrt(2 * (1 - correlation).round(5))
        clusters = scipy_linkage(squareform(distance_matrix.values), method=linkage)
        clustering_inds = fcluster(clusters, num_clusters, criterion='maxclust')
        max_number_of_clusters_possible = max(clustering_inds)
        num_clusters = min(max_number_of_clusters_possible, num_clusters)
        return num_clusters

    def _get_optimal_number_of_clusters(self, correlation, asset_returns, linkage, num_reference_datasets=5):
        """
        Find the optimal number of clusters for hierarchical clustering using the Gap statistic.

        :param correlation: (np.array) Matrix of asset correlations.
        :param asset_returns: (pd.DataFrame) Historical asset returns.
        :param linkage: (str) The type of linkage method to use for clustering.
        :param num_reference_datasets: (int) The number of reference datasets to generate for calculating expected inertia.
        :return: (int) The optimal number of clusters.
        """

        original_distance_matrix = np.sqrt(2 * (1 - correlation).round(5))
        gap_values = []
        num_clusters = 1
        max_number_of_clusters = float("-inf")
        while True:

            # Calculate inertia from original data
            original_clusters = scipy_linkage(squareform(original_distance_matrix), method=linkage)
            original_cluster_assignments = fcluster(original_clusters, num_clusters, criterion='maxclust')
            if max(original_cluster_assignments) == max_number_of_clusters or max(original_cluster_assignments) > 10:
                break
            max_number_of_clusters = max(original_cluster_assignments)
            inertia = self._compute_cluster_inertia(original_cluster_assignments, asset_returns.values)

            # Calculate expected inertia from reference datasets
            expected_inertia = self._calculate_expected_inertia(num_reference_datasets, asset_returns, num_clusters, linkage)

            # Calculate the gap statistic
            gap = expected_inertia - inertia
            gap_values.append(gap)
            num_clusters += 1
        return 1 + np.argmax(gap_values)

    def _calculate_expected_inertia(self, num_reference_datasets, asset_returns, num_clusters, linkage):
        """
        Calculate the expected inertia by generating clusters from a uniform distribution.

        :param num_reference_datasets: (int) The number of reference datasets to generate from the distribution.
        :param asset_returns: (pd.DataFrame) Historical asset returns.
        :param num_clusters: (int) The number of clusters to generate.
        :param linkage: (str) The type of linkage criterion to use for hierarchical clustering.
        :return: (float) The expected inertia from the reference datasets.
        """

        reference_inertias = []
        for _ in range(num_reference_datasets):
            # Generate reference returns from uniform distribution and calculate the distance matrix.
            reference_asset_returns = pd.DataFrame(np.random.rand(*asset_returns.shape))
            reference_correlation = np.array(reference_asset_returns.corr())
            reference_distance_matrix = np.sqrt(2 * (1 - reference_correlation).round(5))

            reference_clusters = scipy_linkage(squareform(reference_distance_matrix), method=linkage)
            reference_cluster_assignments = fcluster(reference_clusters, num_clusters, criterion='maxclust')
            inertia = self._compute_cluster_inertia(reference_cluster_assignments, reference_asset_returns.values)
            reference_inertias.append(inertia)
        return np.mean(reference_inertias)

    def _tree_clustering(self, correlation, linkage):
        """
        Perform agglomerative clustering on the current portfolio.

        :param correlation: (np.array) Matrix of asset correlations.
        :param linkage (str): The type of linkage method to use for clustering.
        :return: (list) Structure of hierarchical tree.
        """

        distance_matrix = np.sqrt(2 * (1 - correlation).round(5))
        clusters = scipy_linkage(squareform(distance_matrix.values), method=linkage)
        clustering_inds = fcluster(clusters, self.optimal_num_clusters, criterion='maxclust')
        cluster_children = {index - 1: [] for index in range(min(clustering_inds), max(clustering_inds) + 1)}
        for index, cluster_index in enumerate(clustering_inds):
            cluster_children[cluster_index - 1].append(index)
        return clusters, cluster_children

    def _quasi_diagnalization(self, num_assets, curr_index):
        """
        Rearrange the assets to reorder them according to hierarchical tree clustering order.

        :param num_assets: (int) The total number of assets.
        :param curr_index: (int) Current index.
        :return: (list) The assets rearranged according to hierarchical clustering.
        """

        if curr_index < num_assets:
            return [curr_index]

        left = int(self.clusters[curr_index - num_assets, 0])
        right = int(self.clusters[curr_index - num_assets, 1])

        return (self._quasi_diagnalization(num_assets, left) + self._quasi_diagnalization(num_assets, right))

    def _recursive_bisection(self, asset_returns, covariance_matrix, assets, risk_measure):
        """
        Recursively assign weights to the clusters - ultimately assigning weights to the individual assets.

        :param asset_returns: (pd.DataFrame) Historical asset returns.
        :param covariance_matrix: (pd.DataFrame) The covariance matrix.
        :param assets: (list) List of asset names in the portfolio.
        :param risk_measure: (str) The metric used for calculating weight allocations.
        """

        num_assets = len(assets)
        self.weights = np.ones(shape=num_assets)
        clusters_contribution = np.ones(shape=self.optimal_num_clusters)
        clusters_weights = np.ones(shape=self.optimal_num_clusters)

        # Calculate the corresponding risk measure for the clusters
        self._calculate_risk_contribution_of_clusters(clusters_contribution,
                                                      risk_measure,
                                                      covariance_matrix,
                                                      asset_returns)

        # Recursive bisection taking into account the dendrogram structure
        for cluster_index in range(self.optimal_num_clusters - 1):

            # Get the left and right cluster ids
            left_cluster_ids, right_cluster_ids = self._get_children_cluster_ids(num_assets=num_assets,
                                                                                 parent_cluster_id=cluster_index)

            # Compute alpha
            left_cluster_contribution = np.sum(clusters_contribution[left_cluster_ids])
            right_cluster_contribution = np.sum(clusters_contribution[right_cluster_ids])
            if risk_measure == 'equal_weighting':
                alloc_factor = 0.5
            else:
                alloc_factor = 1 - left_cluster_contribution / (left_cluster_contribution + right_cluster_contribution)

            # Assign weights to each sub-cluster
            clusters_weights[left_cluster_ids] *= alloc_factor
            clusters_weights[right_cluster_ids] *= 1 - alloc_factor

        # Compute the final weights
        self._calculate_final_portfolio_weights(risk_measure,
                                                clusters_weights,
                                                covariance_matrix,
                                                asset_returns)

        # Assign actual asset names to weight index
        self.weights = pd.DataFrame(self.weights)
        self.weights.index = assets
        self.weights = self.weights.T
        self.weights = self.weights.iloc[:, self.ordered_indices]

    def _calculate_final_portfolio_weights(self, risk_measure, clusters_weights, covariance_matrix, asset_returns):
        """
        Calculate the final asset weights.

        :param risk_measure: (str) The metric used for calculating weight allocations.
        :param clusters_weights: (np.array) The cluster weights calculated using recursive bisection.
        :param covariance_matrix: (pd.DataFrame) The covariance matrix.
        :param asset_returns: (pd.DataFrame) Historical asset returns.
        """

        for cluster_index in range(self.optimal_num_clusters):
            cluster_asset_indices = self.cluster_children[cluster_index]

            # Covariance of assets in this cluster
            cluster_covariance = covariance_matrix.iloc[cluster_asset_indices, cluster_asset_indices]

            # Historical returns of assets in this cluster
            cluster_asset_returns = None
            if not asset_returns.empty:
                cluster_asset_returns = asset_returns.iloc[:, cluster_asset_indices]

            parity_weights = self._calculate_naive_risk_parity(cluster_index=cluster_index,
                                                               risk_measure=risk_measure,
                                                               covariance=cluster_covariance,
                                                               asset_returns=cluster_asset_returns)
            self.weights[cluster_asset_indices] = parity_weights * clusters_weights[cluster_index]

    def _calculate_naive_risk_parity(self, cluster_index, risk_measure, covariance, asset_returns):
        # pylint: disable=no-else-return
        """
        Calculate the naive risk parity weights.

        :param cluster_index: (int) Index of the current cluster.
        :param risk_measure: (str) The metric used for calculating weight allocations.
        :param covariance: (pd.DataFrame) The covariance matrix of asset returns.
        :param asset_returns: (pd.DataFrame) Historical asset returns.
        :return: (np.array) list of risk parity weights for assets in current cluster.
        """

        if risk_measure == 'equal_weighting':
            num_assets_in_cluster = len(self.cluster_children[cluster_index])
            return np.ones(num_assets_in_cluster) * 1/num_assets_in_cluster
        elif risk_measure in {'variance', 'standard_deviation'}:
            return self._get_inverse_variance_weights(covariance)
        elif risk_measure == 'expected_shortfall':
            return self._get_inverse_CVaR_weights(asset_returns)
        return self._get_inverse_CDaR_weights(asset_returns)

    def _calculate_risk_contribution_of_clusters(self, clusters_contribution, risk_measure,
                                                 covariance_matrix, asset_returns):
        """
        Calculate the risk contribution of clusters based on the allocation metric.

        :param clusters_contribution: (np.array) The risk contribution value of the clusters.
        :param risk_measure: (str) The metric used for calculating weight allocations.
        :param covariance_matrix: (pd.DataFrame) The covariance matrix.
        :param asset_returns: (pd.DataFrame) Historical asset returns.
        """

        for cluster_index in range(self.optimal_num_clusters):
            cluster_asset_indices = self.cluster_children[cluster_index]

            if risk_measure == 'variance':
                clusters_contribution[cluster_index] = self._get_cluster_variance(covariance_matrix,
                                                                                  cluster_asset_indices)
            elif risk_measure == 'standard_deviation':
                clusters_contribution[cluster_index] = np.sqrt(
                    self._get_cluster_variance(covariance_matrix, cluster_asset_indices))
            elif risk_measure == 'expected_shortfall':
                clusters_contribution[cluster_index] = self._get_cluster_expected_shortfall(asset_returns,
                                                                                            cluster_asset_indices)
            elif risk_measure == 'conditional_drawdown_risk':
                clusters_contribution[cluster_index] = self._get_cluster_conditional_drawdown_at_risk(
                    asset_returns=asset_returns,
                    cluster_indices=cluster_asset_indices)

    def _get_children_cluster_ids(self, num_assets, parent_cluster_id):
        """
        Find the left and right children cluster id of the given parent cluster id.

        :param num_assets: (int) The number of assets in the portfolio.
        :param parent_cluster_index: (int) The current parent cluster id.
        :return: (list, list) List of cluster ids to the left and right of the parent cluster in the hierarchical tree.
        """

        left = int(self.clusters[num_assets - 2 - parent_cluster_id, 0])
        right = int(self.clusters[num_assets - 2 - parent_cluster_id, 1])
        left_cluster = self._quasi_diagnalization(num_assets, left)
        right_cluster = self._quasi_diagnalization(num_assets, right)

        left_cluster_ids = []
        right_cluster_ids = []
        for id_cluster, cluster in self.cluster_children.items():
            if sorted(self._intersection(left_cluster, cluster)) == sorted(cluster):
                left_cluster_ids.append(id_cluster)
            if sorted(self._intersection(right_cluster, cluster)) == sorted(cluster):
                right_cluster_ids.append(id_cluster)

        return left_cluster_ids, right_cluster_ids

    @staticmethod
    def _get_inverse_variance_weights(covariance):
        """
        Calculate inverse variance weight allocations.

        :param covariance: (pd.DataFrame) Covariance matrix of assets.
        :return: (np.array) Inverse variance weight values.
        """

        inv_diag = 1 / np.diag(covariance.values)
        parity_weights = inv_diag * (1 / np.sum(inv_diag))
        return parity_weights

    def _get_inverse_CVaR_weights(self, asset_returns):
        # pylint: disable=invalid-name
        """
        Calculate inverse CVaR weight allocations.

        :param asset_returns: (pd.DataFrame) Historical asset returns.
        :return: (np.array) Inverse CVaR weight values.
        """

        parity_weights = []
        for asset_index in range(asset_returns.shape[1]):
            returns = asset_returns.iloc[:, asset_index]
            cvar = self.risk_metrics.calculate_expected_shortfall(returns=returns,
                                                                  confidence_level=self.confidence_level)
            parity_weights.append(cvar)

        parity_weights = np.array(parity_weights)
        parity_weights = 1 / parity_weights
        parity_weights = parity_weights * (1 / np.sum(parity_weights))
        return parity_weights

    def _get_inverse_CDaR_weights(self, asset_returns):
        # pylint: disable=invalid-name
        """
        Calculate inverse CDaR weight allocations.

        :param asset_returns: (pd.DataFrame) Historical asset returns.
        :return: (np.array) Inverse CDaR weight values.
        """

        parity_weights = []
        for asset_index in range(asset_returns.shape[1]):
            returns = asset_returns.iloc[:, asset_index]
            cdar = self.risk_metrics.calculate_conditional_drawdown_risk(returns=returns,
                                                                         confidence_level=self.confidence_level)
            parity_weights.append(cdar)

        parity_weights = np.array(parity_weights)
        parity_weights = 1 / parity_weights
        parity_weights = parity_weights * (1 / np.sum(parity_weights))
        return parity_weights

    def _get_cluster_variance(self, covariance, cluster_indices):
        """
        Calculate cluster variance.

        :param covariance: (pd.DataFrame) Covariance matrix of asset returns.
        :param cluster_indices: (list) List of asset indices for the cluster.
        :return: (float) Variance of the cluster.
        """

        cluster_covariance = covariance.iloc[cluster_indices, cluster_indices]
        parity_weights = self._get_inverse_variance_weights(cluster_covariance)
        cluster_variance = self.risk_metrics.calculate_variance(covariance=cluster_covariance, weights=parity_weights)
        return cluster_variance

    def _get_cluster_expected_shortfall(self, asset_returns, cluster_indices):
        """
        Calculate cluster expected shortfall.

        :param asset_returns: (pd.DataFrame) Historical asset returns.
        :param cluster_indices: (list) List of asset indices for the cluster.
        :return: (float) Expected shortfall of the cluster.
        """

        cluster_asset_returns = asset_returns.iloc[:, cluster_indices]
        parity_weights = self._get_inverse_CVaR_weights(cluster_asset_returns)
        portfolio_returns = cluster_asset_returns @ parity_weights
        cluster_expected_shortfall = self.risk_metrics.calculate_expected_shortfall(returns=portfolio_returns,
                                                                                    confidence_level=self.confidence_level)
        return cluster_expected_shortfall

    def _get_cluster_conditional_drawdown_at_risk(self, asset_returns, cluster_indices):
        """
        Calculate cluster conditional drawdown at risk.

        :param asset_returns: (pd.DataFrame) Historical asset returns.
        :param cluster_indices: (list) List of asset indices for the cluster.
        :return: (float) CDD of the cluster.
        """

        cluster_asset_returns = asset_returns.iloc[:, cluster_indices]
        parity_weights = self._get_inverse_CDaR_weights(cluster_asset_returns)
        portfolio_returns = cluster_asset_returns @ parity_weights
        cluster_conditional_drawdown = self.risk_metrics.calculate_conditional_drawdown_risk(returns=portfolio_returns,
                                                                                             confidence_level=self.confidence_level)
        return cluster_conditional_drawdown

    @staticmethod
    def _intersection(list1, list2):
        """
        Calculate the intersection of two lists

        :param list1: (list) The first list of items.
        :param list2: (list) The second list of items.
        :return: (list) List containing the intersection of the input lists.
        """

        return list(set(list1) & set(list2))

    @staticmethod
    def _error_checks(asset_prices, asset_returns, risk_measure, covariance_matrix):
        """
        Perform initial warning checks.

        :param asset_prices: (pd.DataFrame) A dataframe of historical asset prices (daily close)
                                            indexed by date.
        :param asset_returns: (pd.DataFrame/numpy matrix) User supplied matrix of asset returns.
        :param risk_measure: (str) The metric used for calculating weight allocations.
        :param covariance_matrix: (pd.DataFrame/numpy matrix) User supplied covariance matrix of asset returns.
        """

        if asset_prices is None and asset_returns is None and covariance_matrix is None:
            raise ValueError("You need to supply either raw prices or returns or covariance matrix")

        if asset_prices is not None:
            if not isinstance(asset_prices, pd.DataFrame):
                raise ValueError("Asset prices matrix must be a dataframe")
            if not isinstance(asset_prices.index, pd.DatetimeIndex):
                raise ValueError("Asset prices dataframe must be indexed by date.")

        if risk_measure not in \
                {'variance', 'standard_deviation', 'equal_weighting', 'expected_shortfall',
                 'conditional_drawdown_risk'}:
            raise ValueError("Unknown allocation metric specified. Supported metrics are - variance, "
                             "standard_deviation, equal_weighting, expected_shortfall, "
                             "conditional_drawdown_risk")
Exemple #25
0
    def __init__(self):
        """
        Initialize
        """

        self.risk_estimators = RiskEstimators()
Exemple #26
0
class NestedClusteredOptimisation:
    """
    This class implements the Nested Clustered Optimization (NCO) algorithm, the Convex Optimization Solution (CVO),
    the  Monte Carlo Optimization Selection (MCOS) algorithm and sample data generating function. It is reproduced with
    modification from the following paper: `Marcos Lopez de Prado “A Robust Estimator of the Efficient Frontier”,
    (2019). <https://papers.ssrn.com/abstract_id=3469961>`_.
    """
    def __init__(self):
        """
        Initialize
        """

        self.risk_estimators = RiskEstimators()

    def allocate_cvo(self, cov, mu_vec=None):
        """
        Estimates the Convex Optimization Solution (CVO).

        Uses the covariance matrix and the mu - optimal solution.
        If mu is the vector of expected values from variables, the result will be
        a vector of weights with maximum Sharpe ratio.
        If mu is a vector of ones, the result will be a vector of weights with
        minimum variance.

        :param cov: (np.array) Covariance matrix of the variables.
        :param mu_vec: (np.array) Expected value of draws from the variables for maximum Sharpe ratio.
                              None if outputting the minimum variance portfolio.
        :return: (np.array) Weights for optimal allocation.
        """

        # Calculating the inverse covariance matrix. If the matrix is singular i.e. not invertible,
        # then calculate a pseudo-inverse.
        if self.risk_estimators.is_matrix_invertible(cov):
            inv_cov = np.linalg.inv(cov)
        else:
            inv_cov = np.linalg.pinv(cov)

        # Generating a vector of size of the inverted covariance matrix
        ones = np.ones(shape=(inv_cov.shape[0], 1))

        if mu_vec is None:  # To output the minimum variance portfolio
            mu_vec = ones

        # Calculating the analytical solution using CVO - weights
        w_cvo = np.dot(inv_cov, mu_vec)
        w_cvo /= np.dot(mu_vec.T, w_cvo)

        return w_cvo

    def allocate_nco(self, cov, mu_vec=None, max_num_clusters=None, n_init=10):
        """
        Estimates the optimal allocation using the nested clustered optimization (NCO) algorithm.

        First, it clusters the covariance matrix into subsets of highly correlated variables.
        Second, it computes the optimal allocation for each of the clusters separately.
        This allows collapsing of the original covariance matrix into a reduced covariance matrix,
        where each cluster is represented by a single variable.
        Third, we compute the optimal allocations across the reduced covariance matrix.
        Fourth, the final allocations are the dot-product of the intra-cluster (step 2) allocations and
        the inter-cluster (step 3) allocations.

        For the Convex Optimization Solution (CVO), a mu - optimal solution parameter is needed.
        If mu is the vector of expected values from variables, the result will be
        a vector of weights with maximum Sharpe ratio.
        If mu is a vector of ones (pass None value), the result will be a vector of weights with
        minimum variance.

        :param cov: (np.array) Covariance matrix of the variables.
        :param mu_vec: (np.array) Expected value of draws from the variables for maximum Sharpe ratio.
                              None if outputting the minimum variance portfolio.
        :param max_тum_сlusters: (int) Allowed maximum number of clusters. If None then taken as num_elements/2.
        :param n_init: (float) Number of time the k-means algorithm will run with different centroid seeds (default 10)
        :return: (np.array) Optimal allocation using the NCO algorithm.
        """

        # Using pd.DataFrame instead of np.array
        cov = pd.DataFrame(cov)

        # Optimal solution for minimum variance
        if mu_vec is not None:
            mu_vec = pd.Series(mu_vec[:, 0])

        # Calculating correlation matrix
        corr = self.risk_estimators.cov_to_corr(cov)

        # Optimal partition of clusters (step 1)
        corr, clusters, _ = self._cluster_kmeans_base(corr,
                                                      max_num_clusters,
                                                      n_init=n_init)

        # Weights inside clusters
        w_intra_clusters = pd.DataFrame(0,
                                        index=cov.index,
                                        columns=clusters.keys())

        # Iterating over clusters
        for i in clusters:
            # Covariance matrix of elements in cluster
            cov_cluster = cov.loc[clusters[i], clusters[i]].values

            # Optimal solution vector for the cluster
            mu_cluster = (None if mu_vec is None else
                          mu_vec.loc[clusters[i]].values.reshape(-1, 1))

            # Estimating the Convex Optimization Solution in a cluster (step 2)
            w_intra_clusters.loc[clusters[i], i] = self.allocate_cvo(
                cov_cluster, mu_cluster).flatten()

        # Reducing new covariance matrix to calculate inter-cluster weights
        cov_inter_cluster = w_intra_clusters.T.dot(
            np.dot(cov, w_intra_clusters))
        mu_inter_cluster = (None if mu_vec is None else
                            w_intra_clusters.T.dot(mu_vec))

        # Optimal allocations across the reduced covariance matrix (step 3)
        w_inter_clusters = pd.Series(self.allocate_cvo(
            cov_inter_cluster, mu_inter_cluster).flatten(),
                                     index=cov_inter_cluster.index)

        # Final allocations - dot-product of the intra-cluster and inter-cluster allocations (step 4)
        w_nco = w_intra_clusters.mul(w_inter_clusters,
                                     axis=1).sum(axis=1).values.reshape(-1, 1)

        return w_nco

    def allocate_mcos(self,
                      mu_vec,
                      cov,
                      num_obs,
                      num_sims=100,
                      kde_bwidth=0.01,
                      min_var_portf=True,
                      lw_shrinkage=False):
        """
        Estimates the optimal allocation using the Monte Carlo optimization selection (MCOS) algorithm.

        Repeats the CVO and the NCO algorithms multiple times on the empirical values to get a dataframe of trials
        in order to later compare them to a true optimal weights allocation and compare the robustness of the NCO
        and CVO methods.

        :param mu_vec: (np.array) The original vector of expected outcomes.
        :param cov: (np.array )The original covariance matrix of outcomes.
        :param num_obs: (int) The number of observations T used to compute mu_vec and cov.
        :param num_sims: (int) The number of Monte Carlo simulations to run. (100 by default)
        :param kde_bwidth: (float) The bandwidth of the KDE used to de-noise the covariance matrix. (0.01 by default)
        :param min_var_portf: (bool) When True, the minimum variance solution is computed. Otherwise, the
                                     maximum Sharpe ratio solution is computed. (True by default)
        :param lw_shrinkage: (bool) When True, the covariance matrix is subjected to the Ledoit-Wolf shrinkage
                                    procedure. (False by default)
        :return: (pd.DataFrame, pd.DataFrame) DataFrames with allocations for CVO and NCO algorithms.
        """

        # Creating DataFrames for CVO results and NCO results
        w_cvo = pd.DataFrame(columns=range(cov.shape[0]),
                             index=range(num_sims),
                             dtype=float)
        w_nco = w_cvo.copy(deep=True)

        # Iterating thorough simulations
        for simulation in range(num_sims):
            # Deriving empirical vector of means and an empirical covariance matrix
            mu_simulation, cov_simulation = self._simulate_covariance(
                mu_vec, cov, num_obs, lw_shrinkage)

            # If goal is minimum variance
            if min_var_portf:
                mu_simulation = None

            # De-noising covariance matrix
            if kde_bwidth > 0:
                cov_simulation = self.risk_estimators.denoise_covariance(
                    cov_simulation, num_obs / cov_simulation.shape[1],
                    kde_bwidth)

            # Writing the results to corresponding dataframes
            w_cvo.loc[simulation] = self.allocate_cvo(cov_simulation,
                                                      mu_simulation).flatten()
            w_nco.loc[simulation] = self.allocate_nco(
                cov_simulation, mu_simulation,
                int(cov_simulation.shape[0] / 2)).flatten()

        return w_cvo, w_nco

    def estim_errors_mcos(self, w_cvo, w_nco, mu_vec, cov, min_var_portf=True):
        """
        Computes the true optimal allocation w, and compares that result with the estimated ones by MCOS.

        The result is the mean standard deviation between the true weights and the ones obtained from the simulation
        for each algorithm - CVO and NCO.

        :param w_cvo: (pd.DataFrame) DataFrame with weights from the CVO algorithm.
        :param w_nco: (pd.DataFrame) DataFrame with weights from the NCO algorithm.
        :param mu_vec: (np.array) The original vector of expected outcomes.
        :param cov: (np.array)The original covariance matrix of outcomes.
        :param min_var_portf: (bool) When True, the minimum variance solution was computed. Otherwise, the
                                     maximum Sharpe ratio solution was computed. (True by default)
        :return: (float, float) Mean standard deviation of weights for CVO and NCO algorithms.
        """

        # Calculating the true optimal weights allocation
        w_true = self.allocate_cvo(cov, None if min_var_portf else mu_vec)
        w_true = np.repeat(w_true.T, w_cvo.shape[0], axis=0)

        # Mean standard deviation between the weights from CVO algorithm and the true weights
        err_cvo = (w_cvo - w_true).std(axis=0).mean()

        # Mean standard deviation between the weights from NCO algorithm and the true weights
        err_nco = (w_nco - w_true).std(axis=0).mean()

        return err_cvo, err_nco

    @staticmethod
    def _simulate_covariance(mu_vector,
                             cov_matrix,
                             num_obs,
                             lw_shrinkage=False):
        """
        Derives an empirical vector of means and an empirical covariance matrix.

        Based on the set of true means vector and covariance matrix of X distributions,
        the function generates num_obs observations for every X.
        Based on these observations simulated vector of means and the simulated covariance
        matrix are obtained.

        :param mu_vector: (np.array) True means vector for X distributions
        :param cov_matrix: (np.array) True covariance matrix for X distributions
        :param num_obs: (int) Number of observations to draw for every X
        :param lw_shrinkage: (bool) Flag to apply Ledoit-Wolf shrinkage to X (False by default)
        :return: (np.array, np.array) Empirical means vector, empirical covariance matrix
        """

        # Generating a matrix of num_obs observations for X distributions
        observations = np.random.multivariate_normal(mu_vector.flatten(),
                                                     cov_matrix,
                                                     size=num_obs)

        # Empirical means vector calculation
        mu_simulated = observations.mean(axis=0).reshape(-1, 1)

        if lw_shrinkage:  # If applying Ledoit-Wolf shrinkage
            cov_simulated = LedoitWolf().fit(observations).covariance_

        else:  # Simple empirical covariance matrix
            cov_simulated = np.cov(observations, rowvar=False)

        return mu_simulated, cov_simulated

    @staticmethod
    def _cluster_kmeans_base(corr, max_num_clusters=None, n_init=10):
        """
        Finding the optimal partition of clusters using K-Means algorithm.

        For the fit of K-Means algorithm a matrix of distances based on the correlation matrix is used.
        The algorithm iterates n_init number of times and initialises K-Means with different seeds
        and max_number_of_clusters.

        The Silhouette Coefficient is used as a measure of how well samples are clustered
        with samples that are similar to themselves.

        :param corr: (pd.DataFrame) DataFrame with correlation matrix
        :param max_num_clusters: (float) Maximum allowed number of clusters. If None then taken as num_elements/2
        :param n_init: (float) Number of time the k-means algorithm will run with different centroid seeds (default 10)
        :return: (np.array, dict, pd.Series) Correlation matrix of clustered elements, dict with clusters,
                                             Silhouette Coefficient series
        """

        # Distance matrix from correlation matrix
        dist_matrix = ((1 - corr.fillna(0)) / 2)**(1 / 2)

        # Series for Silhouette Coefficients - cluster fit measure
        silh_coef_optimal = pd.Series(dtype='float64')

        # If maximum number of clusters undefined, it's equal to half the number of elements
        if max_num_clusters is None:
            max_num_clusters = ceil(corr.shape[0] / 2)

        # Iterating over the allowed iteration times for k-means
        for init in range(1, n_init + 1):
            # Iterating through every number of clusters
            for num_clusters in range(2, max_num_clusters + 1):
                # Computing k-means clustering
                kmeans = KMeans(n_clusters=num_clusters, n_init=init)
                kmeans = kmeans.fit(dist_matrix)

                # Computing a Silhouette Coefficient - cluster fit measure
                silh_coef = silhouette_samples(dist_matrix, kmeans.labels_)

                # Metrics to compare numbers of clusters
                stat = (silh_coef.mean() / silh_coef.std(),
                        silh_coef_optimal.mean() / silh_coef_optimal.std())

                # If this is the first metric or better than the previous
                # we set it as the optimal number of clusters
                if np.isnan(stat[1]) or stat[0] > stat[1]:
                    silh_coef_optimal = silh_coef
                    kmeans_optimal = kmeans

        # Sorting labels of clusters
        new_index = np.argsort(kmeans_optimal.labels_)

        # Reordering correlation matrix rows
        corr = corr.iloc[new_index]

        # Reordering correlation matrix columns
        corr = corr.iloc[:, new_index]

        # Preparing cluster members as dict
        clusters = {i: corr.columns[np.where(kmeans_optimal.labels_ == i)[0]].tolist() for \
                    i in np.unique(kmeans_optimal.labels_)}

        # Silhouette Coefficient series
        silh_coef_optimal = pd.Series(silh_coef_optimal,
                                      index=dist_matrix.index)

        return corr, clusters, silh_coef_optimal

    @staticmethod
    def _form_block_matrix(num_blocks, block_size, block_corr):
        """
        Creates a correlation matrix in a block form with given parameters.

        :param num_blocks: (int) Number of blocks in matrix
        :param block_size: (int) Size of a single block
        :param block_corr: (float) Correlation of elements in a block
        :return: (np.array) Resulting correlation matrix
        """

        # Creating a single block with all elements as block_corr
        block = np.ones((block_size, block_size)) * block_corr

        # Setting the main diagonal to ones
        block[range(block_size), range(block_size)] = 1

        # Create a block diagonal matrix with a number of equal blocks
        res_matrix = block_diag(*([block] * num_blocks))

        return res_matrix

    def form_true_matrix(self, num_blocks, block_size, block_corr, std=None):
        """
        Creates a random vector of means and a random covariance matrix.

        Due to the block structure of a matrix, it is a good sample data to use in the NCO and MCOS algorithms.

        The number of assets in a portfolio, number of blocks and correlations
        both inside the cluster and between clusters are adjustable.

        :param num_blocks: (int) Number of blocks in matrix
        :param block_size: (int) Size of a single block
        :param block_corr: (float) Correlation of elements in a block
        :param std: (float) Correlation between the clusters. If None, taken a random value from uniform dist[0.05, 0.2]
        :return: (np.array, pd.DataFrame) Resulting vector of means and the dataframe with covariance matrix
        """

        # Creating a block correlation matrix
        corr_matrix = self._form_block_matrix(num_blocks, block_size,
                                              block_corr)

        # Transforming to DataFrame
        corr_matrix = pd.DataFrame(corr_matrix)

        # Getting columns of matrix separately
        columns = corr_matrix.columns.tolist()

        # Randomizing the order of the columns
        np.random.shuffle(columns)
        corr_matrix = corr_matrix[columns].loc[columns].copy(deep=True)

        if std is None:  # Default intra-cluster correlations at 0.5
            std = np.random.uniform(.05, .2, corr_matrix.shape[0])
        else:  # Or the ones set by user
            std = np.array([std] * corr_matrix.shape[1])

        # Calculating covariance matrix from the generated correlation matrix
        cov_matrix = self.risk_estimators.corr_to_cov(corr_matrix, std)

        # Vector of means
        mu_vec = np.random.normal(std, std, cov_matrix.shape[0]).reshape(-1, 1)

        return mu_vec, cov_matrix
Exemple #27
0
class HierarchicalRiskParity:
    """
    This class implements the Hierarchical Risk Parity algorithm mentioned in the following paper: `López de Prado, Marcos,
    Building Diversified Portfolios that Outperform Out-of-Sample (May 23, 2016). Journal of Portfolio Management,
    2016 <https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2708678>`_; The code is reproduced with modification from his book:
    Advances in Financial Machine Learning, Chp-16
    By removing exact analytical approach to the calculation of weights and instead relying on an approximate
    machine learning based approach (hierarchical tree-clustering), Hierarchical Risk Parity produces weights which are stable to
    random shocks in the stock-market. Moreover, previous algorithms like CLA involve the inversion of covariance matrix which is
    a highly unstable operation and tends to have major impacts on the performance due to slight changes in the covariance matrix.
    By removing dependence on the inversion of covariance matrix completely, the Hierarchical Risk Parity algorithm is fast,
    robust and flexible.
    """
    def __init__(self):
        self.weights = list()
        self.seriated_distances = None
        self.seriated_correlations = None
        self.ordered_indices = None
        self.clusters = None
        self.returns_estimator = ReturnsEstimators()
        self.risk_metrics = RiskMetrics()
        self.risk_estimator = RiskEstimators()

    def allocate(self,
                 asset_names=None,
                 asset_prices=None,
                 asset_returns=None,
                 covariance_matrix=None,
                 distance_matrix=None,
                 side_weights=None,
                 linkage='single'):
        # pylint: disable=invalid-name, too-many-branches
        """
        Calculate asset allocations using HRP algorithm.

        :param asset_names: (list) A list of strings containing the asset names
        :param asset_prices: (pd.Dataframe) A dataframe of historical asset prices (daily close)
                                            indexed by date
        :param asset_returns: (pd.Dataframe/numpy matrix) User supplied matrix of asset returns
        :param covariance_matrix: (pd.Dataframe/numpy matrix) User supplied covariance matrix of asset returns
        :param distance_matrix: (pd.Dataframe/numpy matrix) User supplied distance matrix
        :param side_weights: (pd.Series/numpy matrix) With asset_names in index and value 1 for Buy, -1 for Sell
                                                      (default 1 for all)
        :param linkage: (string) Type of linkage used for Hierarchical Clustering. Supported strings - ``single``,
                                 ``average``, ``complete``, ``ward``.
        """

        # Perform error checks
        self._error_checks(asset_prices, asset_returns, covariance_matrix)

        if asset_names is None:
            if asset_prices is not None:
                asset_names = asset_prices.columns
            elif asset_returns is not None and isinstance(
                    asset_returns, pd.DataFrame):
                asset_names = asset_returns.columns
            elif covariance_matrix is not None and isinstance(
                    covariance_matrix, pd.DataFrame):
                asset_names = covariance_matrix.columns
            else:
                raise ValueError("Please provide a list of asset names")

        # Calculate the returns if the user does not supply a returns dataframe
        if asset_returns is None and covariance_matrix is None:
            asset_returns = self.returns_estimator.calculate_returns(
                asset_prices=asset_prices)
        asset_returns = pd.DataFrame(asset_returns, columns=asset_names)

        # Calculate covariance of returns or use the user specified covariance matrix
        if covariance_matrix is None:
            covariance_matrix = asset_returns.cov()
        covariance_matrix = pd.DataFrame(covariance_matrix,
                                         index=asset_names,
                                         columns=asset_names)
        covariance_matrix = self._nan_and_diagonal_checks(covariance_matrix,
                                                          nan_fill_value=0)

        # Calculate correlation matrix from the covariance
        correlation_matrix = self.risk_estimator.cov_to_corr(covariance_matrix)
        correlation_matrix = self._nan_and_diagonal_checks(
            correlation_matrix, nan_fill_value=0, diagonal_fill_value=1)

        # Calculate the distance matrix or use a custom one
        if distance_matrix is None:
            distance_matrix = np.sqrt((1 - correlation_matrix).round(5) / 2)
        distance_matrix = pd.DataFrame(distance_matrix,
                                       index=asset_names,
                                       columns=asset_names)
        distance_matrix = self._nan_and_diagonal_checks(distance_matrix,
                                                        nan_fill_value=0,
                                                        diagonal_fill_value=0)

        # Step-1: Tree Clustering
        self.clusters = self._tree_clustering(distance=distance_matrix,
                                              method=linkage)

        # Step-2: Quasi Diagnalization
        num_assets = len(asset_names)
        self.ordered_indices = self._quasi_diagnalization(
            num_assets, 2 * num_assets - 2)
        self.seriated_distances, self.seriated_correlations = self._get_seriated_matrix(
            assets=asset_names,
            distance=distance_matrix,
            correlation=correlation_matrix)

        # Step-3: Recursive Bisection
        self._recursive_bisection(covariance=covariance_matrix,
                                  assets=asset_names)

        # Build Long/Short portfolio
        if side_weights is None:
            side_weights = pd.Series([1] * num_assets, index=asset_names)
        side_weights = pd.Series(side_weights, index=asset_names)
        self._build_long_short_portfolio(side_weights)

    def plot_clusters(self, assets):
        """
        Plot a dendrogram of the hierarchical clusters.

        :param assets: (list) Asset names in the portfolio
        :return: (dict) Dendrogram
        """

        dendrogram_plot = dendrogram(self.clusters, labels=assets)
        return dendrogram_plot

    @staticmethod
    def _nan_and_diagonal_checks(matrix,
                                 nan_fill_value=0,
                                 diagonal_fill_value=None):
        """
        Check for any NaN values in the matrix and discrepancies in the diagonal values.
        :param matrix: (pd.DataFrame) The matrix which needs to be processed.
        :param nan_fill_value: (float) Replacement value for NaNs
        :param diagonal_fill_value: (float) The values to use for filling the diagonal.
        :return: (pd.DataFrame) Processed matrix.
        """

        matrix = matrix.fillna(nan_fill_value)
        if diagonal_fill_value:
            np.fill_diagonal(matrix.values, val=diagonal_fill_value)
        return matrix

    @staticmethod
    def _tree_clustering(distance, method='single'):
        """
        Perform the traditional heirarchical tree clustering.

        :param correlation: (np.array) Correlation matrix of the assets
        :param method: (str) The type of clustering to be done
        :return: (np.array) Distance matrix and clusters
        """

        clusters = scipy_linkage(squareform(distance.values), method=method)
        return clusters

    def _quasi_diagnalization(self, num_assets, curr_index):
        """
        Rearrange the assets to reorder them according to hierarchical tree clustering order.

        :param num_assets: (int) The total number of assets
        :param curr_index: (int) Current index
        :return: (list) The assets rearranged according to hierarchical clustering
        """

        if curr_index < num_assets:
            return [curr_index]

        left = int(self.clusters[curr_index - num_assets, 0])
        right = int(self.clusters[curr_index - num_assets, 1])

        return (self._quasi_diagnalization(num_assets, left) +
                self._quasi_diagnalization(num_assets, right))

    def _get_seriated_matrix(self, assets, distance, correlation):
        """
        Based on the quasi-diagnalization, reorder the original distance matrix, so that assets within
        the same cluster are grouped together.

        :param assets: (list) Asset names in the portfolio
        :param distance: (pd.Dataframe) Distance values between asset returns
        :param correlation: (pd.Dataframe) Correlations between asset returns
        :return: (np.array) Re-arranged distance matrix based on tree clusters
        """

        ordering = assets[self.ordered_indices]
        seriated_distances = distance.loc[ordering, ordering]
        seriated_correlations = correlation.loc[ordering, ordering]
        return seriated_distances, seriated_correlations

    def _build_long_short_portfolio(self, side_weights):
        """
        Adjust weights according the shorting constraints specified.

        :param side_weights: (pd.Series/numpy matrix) With asset_names in index and value 1 for Buy, -1 for Sell
                                                      (default 1 for all)
        """

        short_ptf = side_weights[side_weights == -1].index
        buy_ptf = side_weights[side_weights == 1].index
        if len(short_ptf) > 0:
            # Short half size
            self.weights.loc[short_ptf] /= self.weights.loc[short_ptf].sum(
            ).values[0]
            self.weights.loc[short_ptf] *= -0.5

            # Buy other half
            self.weights.loc[buy_ptf] /= self.weights.loc[buy_ptf].sum(
            ).values[0]
            self.weights.loc[buy_ptf] *= 0.5
        self.weights = self.weights.T

    @staticmethod
    def _get_inverse_variance_weights(covariance):
        """
        Calculate the inverse variance weight allocations.

        :param covariance: (pd.Dataframe) Covariance matrix of assets
        :return: (list) Inverse variance weight values
        """

        inv_diag = 1 / np.diag(covariance.values)
        parity_w = inv_diag * (1 / np.sum(inv_diag))
        parity_w = np.nan_to_num(parity_w)
        return parity_w

    def _get_cluster_variance(self, covariance, cluster_indices):
        """
        Calculate cluster variance.

        :param covariance: (pd.Dataframe) Covariance matrix of assets
        :param cluster_indices: (list) Asset indices for the cluster
        :return: (float) Variance of the cluster
        """

        cluster_covariance = covariance.iloc[cluster_indices, cluster_indices]
        parity_w = self._get_inverse_variance_weights(cluster_covariance)
        cluster_variance = self.risk_metrics.calculate_variance(
            covariance=cluster_covariance, weights=parity_w)
        return cluster_variance

    def _recursive_bisection(self, covariance, assets):
        """
        Recursively assign weights to the clusters - ultimately assigning weights to the individual assets.

        :param covariance: (pd.Dataframe) The covariance matrix
        :param assets: (list) Asset names in the portfolio
        """
        self.weights = pd.Series(1, index=self.ordered_indices)
        clustered_alphas = [self.ordered_indices]

        while clustered_alphas:
            clustered_alphas = [
                cluster[start:end] for cluster in clustered_alphas
                for start, end in ((0, len(cluster) // 2), (len(cluster) // 2,
                                                            len(cluster)))
                if len(cluster) > 1
            ]

            for subcluster in range(0, len(clustered_alphas), 2):
                left_cluster = clustered_alphas[subcluster]
                right_cluster = clustered_alphas[subcluster + 1]

                # Get left and right cluster variances and calculate allocation factor
                left_cluster_variance = self._get_cluster_variance(
                    covariance, left_cluster)
                right_cluster_variance = self._get_cluster_variance(
                    covariance, right_cluster)
                alloc_factor = 1 - left_cluster_variance / (
                    left_cluster_variance + right_cluster_variance)

                # If for some reason the allocation factor is not calculated properly due to NaNs in the data, then split
                # the allocation equally between the two clusters.
                if np.isnan(alloc_factor):
                    print(
                        "NaNs encountered while calculating allocation factor. Replacing it with equal weighting allocation scheme."
                    )
                    alloc_factor = 0.5

                # Assign weights to each sub-cluster
                self.weights[left_cluster] *= alloc_factor
                self.weights[right_cluster] *= 1 - alloc_factor

        # Assign actual asset values to weight index
        self.weights.index = assets[self.ordered_indices]
        self.weights = pd.DataFrame(self.weights)

    @staticmethod
    def _error_checks(asset_prices, asset_returns, covariance_matrix):
        """
        Perform initial warning checks.

        :param asset_prices: (pd.DataFrame) A dataframe of historical asset prices (daily close)
                                            indexed by date.
        :param asset_returns: (pd.DataFrame/numpy matrix) User supplied matrix of asset returns.
        :param covariance_matrix: (pd.Dataframe/numpy matrix) User supplied covariance matrix of asset returns
        """

        if asset_prices is None and asset_returns is None and covariance_matrix is None:
            raise ValueError(
                "You need to supply either raw prices or returns or a covariance matrix of asset returns"
            )

        if asset_prices is not None:
            if not isinstance(asset_prices, pd.DataFrame):
                raise ValueError("Asset prices matrix must be a dataframe")
            if not isinstance(asset_prices.index, pd.DatetimeIndex):
                raise ValueError(
                    "Asset prices dataframe must be indexed by date.")
Exemple #28
0
    def test_shrinked_covariance(self):
        """
        Test the calculation of the Covariance estimator with shrinkage.
        """

        risk_estimators = RiskEstimators()

        # Getting first three columns of data to be able to compare the output
        prices_dataframe = self.data.iloc[:, :3]
        returns_dataframe = self.returns.iloc[:, :3]

        # Expected resulting Covariance estimators for each shrinkage type
        expected_cov_basic = np.array(
            [[4.47705356e-04, 3.14668132e-04, -1.49635474e-05],
             [3.14668132e-04, 3.63299625e-04, -1.34611717e-05],
             [-1.49635474e-05, -1.34611717e-05, 4.58764444e-05]])

        expected_cov_lw = np.array(
            [[4.63253312e-04, 3.44853842e-04, -1.63989814e-05],
             [3.44853842e-04, 3.70750646e-04, -1.47524847e-05],
             [-1.63989814e-05, -1.47524847e-05, 2.28774674e-05]])

        expected_cov_oas = np.array(
            [[4.65398835e-04, 3.49019287e-04, -1.65970625e-05],
             [3.49019287e-04, 3.71778842e-04, -1.49306780e-05],
             [-1.65970625e-05, -1.49306780e-05, 1.97037481e-05]])

        # Using the Covariance estimator with different types of shrinkage on price data
        shrinked_cov_basic = risk_estimators.shrinked_covariance(
            prices_dataframe,
            price_data=True,
            shrinkage_type='basic',
            basic_shrinkage=0.1)

        shrinked_cov_lw = risk_estimators.shrinked_covariance(
            prices_dataframe, price_data=True, shrinkage_type='lw')

        shrinked_cov_oas = risk_estimators.shrinked_covariance(
            prices_dataframe, price_data=True, shrinkage_type='oas')

        shrinked_cov_all = risk_estimators.shrinked_covariance(
            prices_dataframe,
            price_data=True,
            shrinkage_type='all',
            basic_shrinkage=0.1)

        # Using the Covariance estimator with different types of shrinkage on returns data
        shrinked_cov_basic_ret = risk_estimators.shrinked_covariance(
            returns_dataframe,
            price_data=False,
            shrinkage_type='basic',
            basic_shrinkage=0.1)

        # Testing if the resulting shrinked covariance matrix is right for every method is right
        np.testing.assert_almost_equal(shrinked_cov_basic,
                                       expected_cov_basic,
                                       decimal=7)
        np.testing.assert_almost_equal(shrinked_cov_lw,
                                       expected_cov_lw,
                                       decimal=7)
        np.testing.assert_almost_equal(shrinked_cov_oas,
                                       expected_cov_oas,
                                       decimal=7)

        # And that the results from all methods match the individual methods results
        np.testing.assert_almost_equal(shrinked_cov_all[0],
                                       shrinked_cov_basic,
                                       decimal=7)
        np.testing.assert_almost_equal(shrinked_cov_all[1],
                                       shrinked_cov_lw,
                                       decimal=7)
        np.testing.assert_almost_equal(shrinked_cov_all[2],
                                       shrinked_cov_oas,
                                       decimal=7)

        # And if the results for price and returns are the same
        np.testing.assert_almost_equal(shrinked_cov_basic,
                                       shrinked_cov_basic_ret,
                                       decimal=4)