def test_corrpsd_threshold(): x = np.array([[1, -0.9, -0.9], [-0.9, 1, -0.9], [-0.9, -0.9, 1]]) #print np.linalg.eigvalsh(x) for threshold in [0, 1e-15, 1e-10, 1e-6]: y = corr_nearest(x, n_fact=100, threshold=threshold) evals = np.linalg.eigvalsh(y) #print 'evals', evals, threshold assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15) y = corr_clipped(x, threshold=threshold) evals = np.linalg.eigvalsh(y) #print 'evals', evals, threshold assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15) y = cov_nearest(x, method='nearest', n_fact=100, threshold=threshold) evals = np.linalg.eigvalsh(y) #print 'evals', evals, threshold #print evals[0] / threshold - 1 assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15) y = cov_nearest(x, n_fact=100, threshold=threshold) evals = np.linalg.eigvalsh(y) #print 'evals', evals, threshold #print evals[0] / threshold - 1 assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)
def _find_positive_definite(cov): """Find the nearest positive definite matrix.""" if np.all(np.linalg.eigvalsh(cov) > 0) == 0: while True: cov_new = corr_nearest(cov) if np.all(np.linalg.eigvalsh(cov_new) > 0) == 1: cov = cov_new break return cov
def test_nearest(self): x = self.x res_r = self.res y = corr_nearest(x, threshold=1e-7, n_fact=100) #print np.max(np.abs(x - y)) assert_almost_equal(y, res_r.mat, decimal=3) d = norm_f(x, y) assert_allclose(d, res_r.normF, rtol=0.0015) evals = np.linalg.eigvalsh(y) #print 'evals', evals / res_r.eigenvalues[::-1] - 1 assert_allclose(evals, res_r.eigenvalues[::-1], rtol=0.003, atol=1e-7) #print evals[0] / 1e-7 - 1 assert_allclose(evals[0], 1e-7, rtol=1e-6)
def make_psd(self) -> "correlationEstimate": assets_with_data = self.assets_with_data() assets_without_data = self.assets_with_missing_data() valid_assets_corr_as_np = self.subset(assets_with_data).as_np() nearest_as_np_for_valid_assets = corr_nearest(valid_assets_corr_as_np, n_fact=10) corr_with_valid_assets = correlationEstimate( values=nearest_as_np_for_valid_assets, columns=self.assets_with_data()) corr_with_all = corr_with_valid_assets.add_assets_with_nan_values( assets_without_data) return corr_with_all
def test_corr_psd(): # test positive definite matrix is unchanged x = np.array([[1, -0.2, -0.9], [-0.2, 1, -0.2], [-0.9, -0.2, 1]]) y = corr_nearest(x, n_fact=100) #print np.max(np.abs(x - y)) assert_almost_equal(x, y, decimal=14) y = corr_clipped(x) assert_almost_equal(x, y, decimal=14) y = cov_nearest(x, n_fact=100) assert_almost_equal(x, y, decimal=14) x2 = x + 0.001 * np.eye(3) y = cov_nearest(x2, n_fact=100) assert_almost_equal(x2, y, decimal=14)
def test_corrpsd_threshold(threshold): x = np.array([[1, -0.9, -0.9], [-0.9, 1, -0.9], [-0.9, -0.9, 1]]) y = corr_nearest(x, n_fact=100, threshold=threshold) evals = np.linalg.eigvalsh(y) assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15) y = corr_clipped(x, threshold=threshold) evals = np.linalg.eigvalsh(y) assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15) y = cov_nearest(x, method='nearest', n_fact=100, threshold=threshold) evals = np.linalg.eigvalsh(y) assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15) y = cov_nearest(x, n_fact=100, threshold=threshold) evals = np.linalg.eigvalsh(y) assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)
from statsmodels.stats.correlation_tools import (corr_nearest, corr_clipped, cov_nearest) examples = ['all'] if 'all' in examples: # x0 is positive definite x0 = np.array([[1, -0.2, -0.9], [-0.2, 1, -0.2], [-0.9, -0.2, 1]]) # x has negative eigenvalues, not definite x = np.array([[1, -0.9, -0.9], [-0.9, 1, -0.9], [-0.9, -0.9, 1]]) #x = np.array([[1, 0.2, 0.2], [0.2, 1, 0.2], [0.2, 0.2, 1]]) n_fact = 2 print('evals original', np.linalg.eigvalsh(x)) y = corr_nearest(x, n_fact=100) print('evals nearest', np.linalg.eigvalsh(y)) print(y) y = corr_nearest(x, n_fact=100, threshold=1e-16) print('evals nearest', np.linalg.eigvalsh(y)) print(y) y = corr_clipped(x, threshold=1e-16) print('evals clipped', np.linalg.eigvalsh(y)) print(y) np.set_printoptions(precision=4) print('\nMini Monte Carlo') # we are simulating a uniformly distributed symmetric matrix # and find close positive definite matrix
from statsmodels.stats.correlation_tools import ( corr_nearest, corr_clipped, cov_nearest) examples = ['all'] if 'all' in examples: # x0 is positive definite x0 = np.array([[1, -0.2, -0.9], [-0.2, 1, -0.2], [-0.9, -0.2, 1]]) # x has negative eigenvalues, not definite x = np.array([[1, -0.9, -0.9], [-0.9, 1, -0.9], [-0.9, -0.9, 1]]) #x = np.array([[1, 0.2, 0.2], [0.2, 1, 0.2], [0.2, 0.2, 1]]) n_fact = 2 print 'evals original', np.linalg.eigvalsh(x) y = corr_nearest(x, n_fact=100) print 'evals nearest', np.linalg.eigvalsh(y) print y y = corr_nearest(x, n_fact=100, threshold=1e-16) print 'evals nearest', np.linalg.eigvalsh(y) print y y = corr_clipped(x, threshold=1e-16) print 'evals clipped', np.linalg.eigvalsh(y) print y np.set_printoptions(precision=4) print '\nMini Monte Carlo' # we are simulating a uniformly distributed symmetric matrix # and find close positive definite matrix
def sample_from_corrgan(model_loc, dim=10, n_samples=1): # pylint: disable=import-outside-toplevel, disable=too-many-locals """ Samples correlation matrices from the pre-trained CorrGAN network. It is reproduced with modifications from the following paper: `Marti, G., 2020, May. CorrGAN: Sampling Realistic Financial Correlation Matrices Using Generative Adversarial Networks. In ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (pp. 8459-8463). IEEE. <https://arxiv.org/pdf/1910.09504.pdf>`_ It loads the appropriate CorrGAN model for the required dimension. Generates a matrix output from this network. Symmetries this matrix and finds the nearest correlation matrix that is positive semi-definite. Finally, it maximizes the sum of the similarities between adjacent leaves to arrange it with hierarchical clustering. The CorrGAN network was trained on the correlation profiles of the S&P 500 stocks. Therefore the output retains these properties. In addition, the final output retains the following 6 stylized facts: 1. Distribution of pairwise correlations is significantly shifted to the positive. 2. Eigenvalues follow the Marchenko-Pastur distribution, but for a very large first eigenvalue (the market). 3. Eigenvalues follow the Marchenko-Pastur distribution, but for a couple of other large eigenvalues (industries). 4. Perron-Frobenius property (first eigenvector has positive entries). 5. Hierarchical structure of correlations. 6. Scale-free property of the corresponding Minimum Spanning Tree (MST). :param model_loc: (str) Location of folder containing CorrGAN models. :param dim: (int) Dimension of correlation matrix to sample. In the range [2, 200]. :param n_samples: (int) Number of samples to generate. :return: (np.array) Sampled correlation matrices of shape (n_samples, dim, dim). """ # Import here needed to prevent unnecessary imports in other parts of code. import tensorflow as tf # Validate dimension. if not (1 < dim <= 200): raise ValueError("Dimension not supported, {}".format(dim)) # Resulting correlation matrices. nearest_corr_mats = [] # Load generator model closest to the required dimension by looking at the models folder. dimension_from_folder = [ int(f.split("_")[1][:-1]) for f in listdir(model_loc) if not path.isfile(path.join(model_loc, f)) ] all_generator_dimensions = np.sort(dimension_from_folder) closest_dimension = next( filter(lambda i: i >= dim, all_generator_dimensions)) # Load model. generator = tf.keras.models.load_model("{}/generator_{}d".format( model_loc, closest_dimension), compile=False) # Sample from generator. Input dimension based on network. noise_dim = generator.layers[0].input_shape[1] noise = tf.random.normal([n_samples, noise_dim]) generated_mat = generator(noise, training=False) # Get the indices of an upper triangular matrix. tri_rows, tri_cols = np.triu_indices(dim, k=1) # For each sample generated, make them strict correlation matrices # by projecting them on the nearest correlation matrix using Higham’s # alternating projections method. for i in range(n_samples): # Grab only the required dimensions from generated matrix. corr_mat = np.array(generated_mat[i, :dim, :dim, 0]) # Set diagonal to 1 and symmetrize. np.fill_diagonal(corr_mat, 1) corr_mat[tri_cols, tri_rows] = corr_mat[tri_rows, tri_cols] # Get nearest correlation matrix that is positive semi-definite. nearest_corr_mat = corr_nearest(corr_mat) # Set diagonal to 1 and symmetrize. np.fill_diagonal(nearest_corr_mat, 1) nearest_corr_mat[tri_cols, tri_rows] = nearest_corr_mat[tri_rows, tri_cols] # Arrange with hierarchical clustering by maximizing the sum of the # similarities between adjacent leaves. dist = 1 - nearest_corr_mat linkage_mat = hierarchy.linkage(dist[tri_rows, tri_cols], method="ward") optimal_leaves = hierarchy.optimal_leaf_ordering( linkage_mat, dist[tri_rows, tri_cols]) optimal_ordering = hierarchy.leaves_list(optimal_leaves) ordered_corr = nearest_corr_mat[optimal_ordering, :][:, optimal_ordering] nearest_corr_mats.append(ordered_corr) return np.array(nearest_corr_mats)