def compute_patterns(self, megdata=None, output='patterns'): """ Computes spatial patterns from filter weights. Required for visualization. """ vis_dict = {self.handle: self.train_handle, self.rate: 1} spatial = self.sess.run(self.demix.W, feed_dict=vis_dict) self.filters = np.squeeze( self.sess.run(self.tconv1.filters, feed_dict=vis_dict)) self.patterns = spatial if 'patterns' in output: data = self.sess.run(self.X, feed_dict=vis_dict) data = data.transpose([0, 2, 1]) data = data.reshape([-1, data.shape[-1]]) self.dcov, _ = ledoit_wolf(data) self.patterns = np.dot(self.dcov, self.patterns) if 'full' in output: lat_cov, _ = ledoit_wolf(np.dot(data, spatial)) self.lat_prec = np.linalg.inv(lat_cov) self.patterns = np.dot(self.patterns, self.lat_prec) self.out_weights, self.out_biases = self.sess.run( [self.fin_fc.w, self.fin_fc.b], feed_dict=vis_dict) self.out_weights = np.reshape(self.out_weights, [self.specs['n_ls'], -1, self.n_classes])
def getSigma(datas, method='Simple'): asset = datas.columns datas['n'] = np.arange(datas.shape[0]) datas['group'] = pd.qcut(datas.n, 4, labels=False) weights = np.arange(1, datas.shape[1]) / 10 if method == 'Simple': sigma_1 = datas.loc[datas.group == 0, asset].cov() sigma_2 = datas.loc[datas.group == 1, asset].cov() sigma_3 = datas.loc[datas.group == 2, asset].cov() sigma_4 = datas.loc[datas.group == 3, asset].cov() sigma = 0.1 * sigma_1 + sigma_2 * 0.2 + sigma_3 * 0.3 + sigma_4 * 0.4 elif method == 'Ledoit': sigma_1, a = ledoit_wolf(datas.loc[datas.group == 0, asset]) sigma_2, a = ledoit_wolf(datas.loc[datas.group == 1, asset]) sigma_3, a = ledoit_wolf(datas.loc[datas.group == 2, asset]) sigma_4, a = ledoit_wolf(datas.loc[datas.group == 3, asset]) sigma = 0.1 * sigma_1 + sigma_2 * 0.2 + sigma_3 * 0.3 + sigma_4 * 0.4 sigma = pd.DataFrame(sigma) elif method == 'DW': datas[datas > 0] = 0 datas['n'] = np.arange(datas.shape[0]) datas['group'] = pd.qcut(datas.n, 4, labels=False) sigma_1 = datas.loc[datas.group == 0, asset].cov() sigma_2 = datas.loc[datas.group == 1, asset].cov() sigma_3 = datas.loc[datas.group == 2, asset].cov() sigma_4 = datas.loc[datas.group == 3, asset].cov() sigma = 0.1 * sigma_1 + sigma_2 * 0.2 + sigma_3 * 0.3 + sigma_4 * 0.4 else: pass return sigma
def calculate_Sigma(X, method_name='SCE'): if "pandas" in str(type(X)): X = X.values sigma = None if method_name == 'SCE': sigma = np.cov(X, rowvar=False) elif method_name == 'LWE': sigma = sk_cov.ledoit_wolf(X)[0] elif method_name == 'rie' or method_name == 'RIE': if X.shape[0] <= X.shape[1]: # sigma = sk_cov.ledoit_wolf(X)[0] sigma = np.cov(X, rowvar=False) else: sigma = rmt.optimalShrinkage(X, return_covariance=True) elif method_name == 'clipped': if X.shape[0] <= X.shape[1]: sigma = sk_cov.ledoit_wolf(X)[0] # sigma = rmt.optimalShrinkage(X, return_covariance=True) else: sigma = rmt.clipped(X, return_covariance=True) else: raise Exception( 'Error occurred with method name: {}'.format(method_name)) return sigma
def bhdist(mu1, mu2, mat1, mat2, cov_est=1): #Bhattacharyya_distance assuming normal distros diff_mn_mat = np.matrix(mu1 - mu2).T if (cov_est == 0): cov_mat1 = np.cov(np.matrix(mat1).T) cov_mat2 = np.cov(np.matrix(mat2).T) elif (cov_est == 1): cov_mat1 = ledoit_wolf(mat1)[0] cov_mat2 = ledoit_wolf(mat2)[0] elif (cov_est == 2): cov_mat1 = diag_covmat(mat1) cov_mat2 = diag_covmat(mat2) cov_mat_mn = (cov_mat1 + cov_mat2) / 2 icov_mat_mn = invcov_mah(cov_mat_mn, 0) term1 = np.dot(np.dot(diff_mn_mat.T, icov_mat_mn), diff_mn_mat) / 8 (sign1, logdet1) = np.linalg.slogdet(cov_mat1) (sign2, logdet2) = np.linalg.slogdet(cov_mat2) (sign_mn, logdet_mn) = np.linalg.slogdet(cov_mat_mn) ln_det_mat1 = logdet1 ln_det_mat2 = logdet2 ln_det_mat_mn = logdet_mn term2 = (ln_det_mat_mn / 2) - (ln_det_mat1 + ln_det_mat2 ) / 4 #np.log(det_mat_mn/np.sqrt(det_mat1*det_mat2))/2 result = term1 + term2 return result[0, 0]
def fit(self, X): sc = StandardScaler() # standardize features X_sc = sc.fit_transform(X) s = ledoit_wolf(X_sc)[0] # rescale s = sc.scale_[:, np.newaxis] * s * sc.scale_[np.newaxis, :] self.covariance_ = s
def _get_omega(self, returns): """ Get robust covariance matrix for use in Newton solver. Parameters ---------- returns: numpy array of return data Returns ---------- omega: array of shape nxn where n is equal to the number of securities invovled """ corr_returns = returns[-self.corr_window:, :] cov_returns = returns[-self.cov_window:, :] if self.cov_est == 'oas': omega = OAS().fit(corr_returns).covariance_ * 10**4 elif self.cov_est == 'empirical': omega = EmpiricalCovariance().fit(corr_returns).covariance_ * 10**4 else: corr = np.corrcoef(corr_returns, rowvar=False) cov_diag = np.diag(np.sqrt(np.var(cov_returns, axis=0))) omega = cov_diag @ corr @ cov_diag if self.lw_shrink is None: lw = ledoit_wolf(corr_returns)[1] omega = shrunk_covariance(omega, shrinkage=lw) * 10**4 else: omega = shrunk_covariance(omega, shrinkage=self.lw_shrink) * 10**4 return omega
def _cov(X, shrinkage=None): """Estimate covariance matrix (using optional shrinkage). Parameters ---------- X : array-like, shape (n_samples, n_features) Input data. shrinkage : string or float, optional Shrinkage parameter, possible values: - None or 'empirical': no shrinkage (default). - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. - float between 0 and 1: fixed shrinkage parameter. Returns ------- s : array, shape (n_features, n_features) Estimated covariance matrix. """ shrinkage = "empirical" if shrinkage is None else shrinkage if isinstance(shrinkage, str): if shrinkage == 'auto': sc = StandardScaler() # standardize features X = sc.fit_transform(X) s = ledoit_wolf(X)[0] # rescale s = sc.scale_[:, np.newaxis] * s * sc.scale_[np.newaxis, :] elif shrinkage == 'empirical': s = empirical_covariance(X) else: raise ValueError('unknown shrinkage parameter') elif isinstance(shrinkage, float) or isinstance(shrinkage, int): if shrinkage < 0 or shrinkage > 1: raise ValueError('shrinkage parameter must be between 0 and 1') s = shrunk_covariance(empirical_covariance(X), shrinkage) else: raise TypeError('shrinkage must be of string or int type') return s
def get_scores_one_cluster(ftrain, ftest, food, shrunkcov=False): if shrunkcov: print("Using ledoit-wolf covariance estimator.") cov = lambda x: ledoit_wolf(x)[0] else: cov = lambda x: np.cov(x.T, bias=True) # ToDO: Simplify these equations dtest = np.sum( (ftest - np.mean(ftrain, axis=0, keepdims=True)) * ( np.linalg.pinv(cov(ftrain)).dot( (ftest - np.mean(ftrain, axis=0, keepdims=True)).T ) ).T, axis=-1, ) dood = np.sum( (food - np.mean(ftrain, axis=0, keepdims=True)) * ( np.linalg.pinv(cov(ftrain)).dot( (food - np.mean(ftrain, axis=0, keepdims=True)).T ) ).T, axis=-1, ) return dtest, dood
def var_info(mat1, mat2): n = mat1.shape[1] mat0 = np.hstack([mat1,mat2]) cov_mat0 = ledoit_wolf(mat0)[0] cov_mat1 = ledoit_wolf(mat1)[0] cov_mat2 = ledoit_wolf(mat2)[0] (sign0, logdet0) = np.linalg.slogdet(cov_mat0) (sign1, logdet1) = np.linalg.slogdet(cov_mat1) (sign2, logdet2) = np.linalg.slogdet(cov_mat2) ln_det_mat0 = logdet0 ln_det_mat1 = logdet1 ln_det_mat2 = logdet2 H_mat1 = 0.5*np.log(np.power((2*np.exp(1)*np.pi), n)) + 0.5*ln_det_mat1 H_mat2 = 0.5*np.log(np.power((2*np.exp(1)*np.pi), n)) + 0.5*ln_det_mat2 MI = 0.5*(ln_det_mat1 + ln_det_mat2 - ln_det_mat0) return H_mat1 + H_mat2 - 2*MI;
def sk_ledoit_wolf(X): """Estimate inverse covariance via scikit-learn ledoit_wolf function. """ print("Ledoit-Wolf (sklearn)") lw_cov_, _ = ledoit_wolf(X) lw_prec_ = np.linalg.inv(lw_cov_) return lw_cov_, lw_prec_
def rpw_ledoit_wolf(prices, initial_weights=None, risk_weights=None, risk_parity_method='ccd', maximum_iterations=100, tolerance=1E-8, min_assets_number=2, max_assets_number=6): """ Calculates the equal risk contribution / risk parity weights given a DataFrame of returns. Wraps mean_var_weights with ledoit_wolf covariance calculation method Args: * prices (DataFrame): Prices for multiple securities. * initial_weights (list): Starting asset weights [default inverse vol]. * risk_weights (list): Risk target weights [default equal weight]. * risk_parity_method (str): Risk parity estimation method. Currently supported: - ccd (cyclical coordinate descent)[default] * maximum_iterations (int): Maximum iterations in iterative solutions. * tolerance (float): Tolerance level in iterative solutions. * min_assets_number: mininial assets number in portfolio at time t * max_assets_number: maxinial assets number in portfolio at time t Returns: Series {col_name: weight} """ r = prices.to_returns().dropna() covar = ledoit_wolf(r)[0] return covar
def likelihood(self, y_obs, y_sim): # print("DEBUG: SynLiklihood.likelihood().") if not isinstance(y_obs, list): raise TypeError('Observed data is not of allowed types') if not isinstance(y_sim, list): raise TypeError('simulated data is not of allowed types') # Extract summary statistics from the observed data if (self.stat_obs is None or y_obs != self.data_set): self.stat_obs = self.statistics_calc.statistics(y_obs) self.data_set = y_obs # Extract summary statistics from the simulated data stat_sim = self.statistics_calc.statistics(y_sim) # Compute the mean, robust precision matrix and determinant of precision matrix # print("DEBUG: meansim computation.") mean_sim = np.mean(stat_sim, 0) # print("DEBUG: robust_precision_sim computation.") lw_cov_, _ = ledoit_wolf(stat_sim) robust_precision_sim = np.linalg.inv(lw_cov_) # print("DEBUG: robust_precision_sim_det computation..") robust_precision_sim_det = np.linalg.det(robust_precision_sim) # print("DEBUG: combining.") tmp1 = robust_precision_sim * np.array( self.stat_obs.reshape(-1, 1) - mean_sim.reshape(-1, 1)).T tmp2 = np.exp( np.sum( -0.5 * np.sum(np.array(self.stat_obs - mean_sim) * np.array(tmp1).T, axis=1))) tmp3 = pow(np.sqrt((1 / (2 * np.pi)) * robust_precision_sim_det), self.stat_obs.shape[0]) return tmp2 * tmp3
def sk_ledoit_wolf(X): '''Estimate inverse covariance via scikit-learn ledoit_wolf function. ''' print 'Ledoit-Wolf (sklearn)' lw_cov_, _ = ledoit_wolf(X) lw_prec_ = np.linalg.inv(lw_cov_) return lw_cov_, lw_prec_
def estimate(df, mean_est='equal_weights', cov_est='equal_weights', alpha=1e-10): """ Estimate mean and covariance given historical data Parameters ---------- df: pd.DataFrame (n.sample, n.feature) historical data mean_est: str method to estimate mean selected from {'equal_weights', 'exponential_weights', 'linear-weights'} cov_est: str method to estimate covariance selected from {'equal_weights', 'exponential_weights', 'ledoit_wolf', 'oas'} alpha: float, required if exponential_weights selected [0, 1], larger alpha means more weights on near exponential_weights -> equal_weights if alpha -> 0 Return ------ mean, cov: np.array estimated mean (n.feature) and covariance (n.feature * n.feature) """ if not isinstance(df, pd.DataFrame): raise TypeError('Historical data must be data frame.') if not isinstance(alpha, float): raise TypeError('Parameter alpha must be float.') if mean_est == 'equal_weights': mean = df.mean().values elif mean_est == 'exponential_weights': mean = df.ewm(alpha=alpha).mean().iloc[-1].values elif mean_est == 'linear-weights': weights = np.array(range(1, df.shape[0] + 1)) mean = df.values.T @ weights / sum(weights) else: raise ValueError('Method does not exist.') if cov_est == 'equal_weights': cov = df.cov().values elif cov_est == 'exponential_weights': cov = df.ewm(alpha=alpha).cov().iloc[-df.shape[1]:].values elif cov_est == 'ledoit_wolf': cov, _ = ledoit_wolf(df) elif cov_est == 'oas': cov, _ = oas(df) else: raise ValueError('Method does not exist.') return mean, cov
def _construct_mcca_gevp(Xs, regs=None, as_lists=False): r""" Constructs the matrices for the MCCA generalized eigenvector problem :math:`LHS v = \lambda RHS v`. Parameters ---------- Xs : list of array-likes or numpy.ndarray The list of data matrices regs : None | float | 'lw' | 'oas' or list of them, shape (n_views) As described in ``mvlearn.mcca.mcca.MCCA`` as_lists : bool If True, returns LHS and RHS as lists of composing blocks instead of their composition into full matrices. Returns ------- LHS, RHS : numpy.ndarray, (sum_b n_features_b, sum_b n_features_b) Left and right hand side matrices for the GEVP """ Xs, n_views, n_samples, n_features = check_Xs( Xs, multiview=True, return_dimensions=True ) regs = _check_regs(regs, n_views) LHS = [[None for b in range(n_views)] for b in range(n_views)] RHS = [None for b in range(n_views)] # cross covariance matrices for (a, b) in combinations(range(n_views), 2): LHS[a][b] = Xs[a].T @ Xs[b] LHS[b][a] = LHS[a][b].T # view covariance matrices, possibly regularized for b in range(n_views): if regs[b] is None: RHS[b] = Xs[b].T @ Xs[b] elif isinstance(regs[b], Number): RHS[b] = (1 - regs[b]) * Xs[b].T @ Xs[b] + \ regs[b] * np.eye(n_features[b]) elif isinstance(regs[b], str): if regs[b] == "lw": RHS[b] = ledoit_wolf(Xs[b])[0] elif regs[b] == "oas": RHS[b] = oas(Xs[b])[0] # put back on scale of X^TX as oppose to # proper cov est returned by these functions RHS[b] *= n_samples LHS[b][b] = RHS[b] if not as_lists: LHS = np.block(LHS) RHS = block_diag(*RHS) return LHS, RHS
def ledoit_wolf(self): """ Calculate the Ledoit-Wolf shrinkage estimate. :return: shrunk sample covariance matrix :rtype: np.ndarray """ X = np.nan_to_num(self.X.values) shrunk_cov, self.delta = covariance.ledoit_wolf(X) return self.format_and_annualise(shrunk_cov)
def calculate_covariance(self, t): covariance = self.pert_kernel.covariance(t, self.theta[t - 1], self.delta[t - 1], self.epsilon[t], self.wt[t - 1]) if np.linalg.det(covariance) < 1.E-15: covariance = ledoit_wolf(self.theta[t - 1])[0] return covariance
def prewhiten(betas, residuals): cov_ledoit, _ = ledoit_wolf(residuals) Uc, Dc, Vhc = svd(cov_ledoit, full_matrices=False) cov_ledoit_sqrt = np.dot(Uc * np.sqrt(Dc), Vhc) prewhiten_ok = True try: betas_prewhitened = np.dot(betas, np.linalg.inv(cov_ledoit_sqrt)) except np.linalg.LinAlgError: betas_prewhitened = betas #if there are problems use original prewhiten_ok = False return (betas_prewhitened, prewhiten_ok)
def kernel(self,Pid,t): if self.variance_method == 4: covariance = self.variance[Pid] else: covariance = self.variance if np.linalg.det(covariance) <1.E-15: #maybe singular matrix; check diagonals for small values #if self.verbose: # print "Variance is a singular matrix", covariance # print "using l2 shrinkage with the Ledoit-Wolf estimator..." covariance, _ = ledoit_wolf(self.theta[t]) return scipy.stats.multivariate_normal(mean=self.theta[t][Pid],cov=covariance,allow_singular=True).pdf
def get_var(self, t, params): ''' Input: t: iteration level pms: parameter vector for all particles from previous iteration Returns: particle covariance with Ledoit-Wolf estimator ''' if self.start: return self.first_iter(t, params) else: var, _ = ledoit_wolf(params) return var
def invcov_mah(mat, cov_est=1): if (cov_est == 0): mat = np.cov(np.matrix(mat).T) elif (cov_est == 1): mat = ledoit_wolf(mat)[0] elif (cov_est == 2): mat = diag_covmat(mat) try: icov_mat = np.linalg.inv(mat) except: icov_mat = np.linalg.pinv(mat) return icov_mat
def rpw_lstm(prices, initial_weights=None, risk_weights=None, risk_parity_method='ccd', maximum_iterations=100, tolerance=1E-8, min_assets_number=2, max_assets_number=6): r = prices.to_returns().dropna() covar = ledoit_wolf(r)[0] for i in range(len(prices.columns)): var, _ = forecast_var_from_lstm(prices[prices.columns[i]]) covar[i, i] = (var / 100.0) #**(0.5)
def approximated_max_kelly(data): """Find a approximated solution of the portofolio based on kelly criterion.""" returns_data = data.pct_change().dropna() _, delta = ledoit_wolf(data) sigma = CovarianceShrinkage(data).shrunk_covariance(delta=delta) mu = returns_data.mean(axis=0) A = 0.5 * sigma A = np.hstack((A, np.ones((sigma.shape[0], 1)))) A = np.vstack((A, np.ones((1, sigma.shape[0] + 1)))) A[-1, -1] = 0.0 B = np.hstack((mu, [1])) w = np.dot(np.linalg.inv(A), B) return pd.DataFrame([w[:-1]], columns=data.columns)
def _estimate_asset_cov(self, trade_date): index_ids = self.index_ids df_index_inc = self._load_index_inc(trade_date) df_index_cov = pd.DataFrame(ledoit_wolf(df_index_inc.dropna(), assume_centered=False)[0], index=index_ids, columns=index_ids) ser_index_cov = df_index_cov.stack().rename(trade_date) return ser_index_cov
def bhdist(mu1, mu2, mat1, mat2, reg=0): #Bhattacharyya_distance assuming normal distros #expects columns to be observations and rows features diff_mn_mat = np.matrix(mu1 - mu2).T if (reg == 1): cov_mat1 = ledoit_wolf(mat1)[0] cov_mat2 = ledoit_wolf(mat2)[0] else: cov_mat1 = np.cov(mat1) cov_mat2 = np.cov(mat2) cov_mat_mn = (cov_mat1 + cov_mat2) / 2 icov_mat_mn = np.linalg.inv(cov_mat_mn) term1 = np.dot(np.dot(diff_mn_mat.T, icov_mat_mn), diff_mn_mat) / 8 (sign1, logdet1) = np.linalg.slogdet(cov_mat1) (sign2, logdet2) = np.linalg.slogdet(cov_mat2) (sign_mn, logdet_mn) = np.linalg.slogdet(cov_mat_mn) ln_det_mat1 = logdet1 ln_det_mat2 = logdet2 ln_det_mat_mn = logdet_mn term2 = (ln_det_mat_mn / 2) - (ln_det_mat1 + ln_det_mat2) / 4 result = term1 + term2 return result[0, 0]
def invcov_mah(mat, cov_est=1): #this function is mostly for regularising the covariance matrix used in the mahalanobis and bhattacharyya distances (cov_est setting). It also attempts to compute either en inverse or pseudo-inverse covariance matrix. if (cov_est == 0): mat = np.cov(np.matrix(mat).T) elif (cov_est == 1): mat = ledoit_wolf(mat)[0] elif (cov_est == 2): mat = diag_covmat(mat) try: icov_mat = np.linalg.inv(mat) except: icov_mat = np.linalg.pinv(mat) return icov_mat
def _initialize(self, X): n_samples, observed_dimensions = X.shape kmeans = KMeans(self._n_components, n_init=self._n_init) lab = kmeans.fit(X).predict(X) self._covs = [] for i in range(self._n_components): cl_indxs = np.where(lab == i)[0] rnd_indxs = np.random.choice(range(n_samples), size=5) indx = np.concatenate([cl_indxs, rnd_indxs]) # Avoid non-singular covariance self._covs.append(ledoit_wolf(X[indx])[0]) self._pi = np.ones(self._n_components) / self._n_components self._log_pi = np.log(self._pi) self._mus = np.array(kmeans.cluster_centers_)
def var_info(mat1, mat2, reg=0): #Variation of Information assuming normal distros #expects rows to be observations and columns features #expects same number of observations in both matrices #needs checking.... n = mat1.shape[0] mat0 = np.hstack([mat1, mat2]) if (reg == 1): cov_mat0 = ledoit_wolf(mat0)[0] cov_mat1 = ledoit_wolf(mat1)[0] cov_mat2 = ledoit_wolf(mat2)[0] else: cov_mat0 = np.cov(mat0) cov_mat1 = np.cov(mat1) cov_mat2 = np.cov(mat2) (sign0, logdet0) = 0.5 * n * np.linalg.slogdet(cov_mat0 * 2 * np.exp(1) * np.pi) (sign1, logdet1) = 0.5 * n * np.linalg.slogdet(cov_mat1 * 2 * np.exp(1) * np.pi) (sign2, logdet2) = 0.5 * n * np.linalg.slogdet(cov_mat2 * 2 * np.exp(1) * np.pi) MI = (logdet1 + logdet2 - logdet0) return logdet0 - MI
def rpw_future(prices, initial_weights=None, risk_weights=None, risk_parity_method='ccd', maximum_iterations=100, tolerance=1E-8, min_assets_number=2, max_assets_number=6): r = prices.to_returns().dropna() covar = ledoit_wolf(r)[0] for i in range(len(r.columns)): _, var = future_mean_var(prices[prices.columns[i]].values) covar[i, i] = var * 100 return covar
def test_ledoit_wolf(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) result = df.covariance.ledoit_wolf() expected = covariance.ledoit_wolf(iris.data) self.assertEqual(len(result), 2) self.assertIsInstance(result[0], pdml.ModelFrame) tm.assert_index_equal(result[0].index, df.data.columns) tm.assert_index_equal(result[0].columns, df.data.columns) self.assert_numpy_array_almost_equal(result[0].values, expected[0]) self.assert_numpy_array_almost_equal(result[1], expected[1])
def test_ledoit_wolf(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) result = df.covariance.ledoit_wolf() expected = covariance.ledoit_wolf(iris.data) self.assertEqual(len(result), 2) self.assertTrue(isinstance(result[0], pdml.ModelFrame)) self.assert_index_equal(result[0].index, df.data.columns) self.assert_index_equal(result[0].columns, df.data.columns) self.assert_numpy_array_almost_equal(result[0].values, expected[0]) self.assert_numpy_array_almost_equal(result[1], expected[1])
def get_covariance_estimator(estimator): if hasattr(estimator, "__call__"): f = estimator elif type(estimator) == str: if estimator == "MCD" or estimator == "mcd" or estimator == "MinCovDet" or estimator == "fast_mcd": f = fast_mcd elif estimator == "Ledoit-Wolf" or estimator == "LW" or estimator == "lw": f = lambda x: ledoit_wolf(x)[0] elif estimator == "OAS" or estimator == "oas": f = lambda x: oas(x)[0] else: f = empirical_covariance else: f = empirical_covariance return f
def correlation(X): "Compute correlation matrix" X = X - X.mean(axis=0) X /= X.std(axis=0) cov, _ = covariance.ledoit_wolf(X) # To have robust correlations, use MCD, Minimum cov determinant, instead return cov
def test_ledoit_wolf(): """Tests LedoitWolf module on a simple dataset. """ # test shrinkage coeff on a simple data set X_centered = X - X.mean(axis=0) lw = LedoitWolf(assume_centered=True) lw.fit(X_centered) shrinkage_ = lw.shrinkage_ score_ = lw.score(X_centered) assert_almost_equal(ledoit_wolf_shrinkage(X_centered, assume_centered=True), shrinkage_) assert_almost_equal(ledoit_wolf_shrinkage(X_centered, assume_centered=True, block_size=6), shrinkage_) # compare shrunk covariance obtained from data and from MLE estimate lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_centered, assume_centered=True) assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4) assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_) # compare estimates given by LW and ShrunkCovariance scov = ShrunkCovariance(shrinkage=lw.shrinkage_, assume_centered=True) scov.fit(X_centered) assert_array_almost_equal(scov.covariance_, lw.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) lw = LedoitWolf(assume_centered=True) lw.fit(X_1d) lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d, assume_centered=True) assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4) assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_) assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) lw = LedoitWolf(store_precision=False, assume_centered=True) lw.fit(X_centered) assert_almost_equal(lw.score(X_centered), score_, 4) assert(lw.precision_ is None) # (too) large data set X_large = np.ones((20, 200)) assert_raises(MemoryError, ledoit_wolf, X_large, block_size=100) # Same tests without assuming centered data # test shrinkage coeff on a simple data set lw = LedoitWolf() lw.fit(X) assert_almost_equal(lw.shrinkage_, shrinkage_, 4) assert_almost_equal(lw.shrinkage_, ledoit_wolf_shrinkage(X)) assert_almost_equal(lw.shrinkage_, ledoit_wolf(X)[1]) assert_almost_equal(lw.score(X), score_, 4) # compare shrunk covariance obtained from data and from MLE estimate lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X) assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4) assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_) # compare estimates given by LW and ShrunkCovariance scov = ShrunkCovariance(shrinkage=lw.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, lw.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) lw = LedoitWolf() lw.fit(X_1d) lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d) assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4) assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4) # test with one sample X_1sample = np.arange(5) lw = LedoitWolf() with warnings.catch_warnings(record=True): lw.fit(X_1sample) # test shrinkage coeff on a simple data set (without saving precision) lw = LedoitWolf(store_precision=False) lw.fit(X) assert_almost_equal(lw.score(X), score_, 4) assert(lw.precision_ is None)
prec *= d prec *= d[:, np.newaxis] X = prng.multivariate_normal(np.zeros(n_features), cov, size=n_samples) X -= X.mean(axis=0) X /= X.std(axis=0) ############################################################################## # Estimate the covariance emp_cov = np.dot(X.T, X) / n_samples model = GraphLassoCV() model.fit(X) cov_ = model.covariance_ prec_ = model.precision_ lw_cov_, _ = ledoit_wolf(X) lw_prec_ = linalg.inv(lw_cov_) ############################################################################## # Plot the results pl.figure(figsize=(10, 6)) pl.subplots_adjust(left=0.02, right=0.98) # plot the covariances covs = [('Empirical', emp_cov), ('Ledoit-Wolf', lw_cov_), ('GraphLasso', cov_), ('True', cov)] vmax = cov_.max() for i, (name, this_cov) in enumerate(covs): pl.subplot(2, 4, i + 1) pl.imshow(this_cov, interpolation='nearest', vmin=-vmax, vmax=vmax, cmap=pl.cm.RdBu_r)
def test_ledoit_wolf(): # Tests LedoitWolf module on a simple dataset. # test shrinkage coeff on a simple data set X_centered = X - X.mean(axis=0) lw = LedoitWolf(assume_centered=True) lw.fit(X_centered) shrinkage_ = lw.shrinkage_ score_ = lw.score(X_centered) assert_almost_equal(ledoit_wolf_shrinkage(X_centered, assume_centered=True), shrinkage_) assert_almost_equal(ledoit_wolf_shrinkage(X_centered, assume_centered=True, block_size=6), shrinkage_) # compare shrunk covariance obtained from data and from MLE estimate lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_centered, assume_centered=True) assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4) assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_) # compare estimates given by LW and ShrunkCovariance scov = ShrunkCovariance(shrinkage=lw.shrinkage_, assume_centered=True) scov.fit(X_centered) assert_array_almost_equal(scov.covariance_, lw.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) lw = LedoitWolf(assume_centered=True) lw.fit(X_1d) lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_1d, assume_centered=True) assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4) assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_) assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) lw = LedoitWolf(store_precision=False, assume_centered=True) lw.fit(X_centered) assert_almost_equal(lw.score(X_centered), score_, 4) assert(lw.precision_ is None) # Same tests without assuming centered data # test shrinkage coeff on a simple data set lw = LedoitWolf() lw.fit(X) assert_almost_equal(lw.shrinkage_, shrinkage_, 4) assert_almost_equal(lw.shrinkage_, ledoit_wolf_shrinkage(X)) assert_almost_equal(lw.shrinkage_, ledoit_wolf(X)[1]) assert_almost_equal(lw.score(X), score_, 4) # compare shrunk covariance obtained from data and from MLE estimate lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X) assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4) assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_) # compare estimates given by LW and ShrunkCovariance scov = ShrunkCovariance(shrinkage=lw.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, lw.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) lw = LedoitWolf() lw.fit(X_1d) lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_1d) assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4) assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4) # test with one sample # warning should be raised when using only 1 sample X_1sample = np.arange(5).reshape(1, 5) lw = LedoitWolf() assert_warns(UserWarning, lw.fit, X_1sample) assert_array_almost_equal(lw.covariance_, np.zeros(shape=(5, 5), dtype=np.float64)) # test shrinkage coeff on a simple data set (without saving precision) lw = LedoitWolf(store_precision=False) lw.fit(X) assert_almost_equal(lw.score(X), score_, 4) assert(lw.precision_ is None)
def benchmark3(): """Compare group_sparse_covariance result for different initializations. """ ## parameters = {'n_tasks': 10, 'n_var': 50, 'density': 0.15, ## 'alpha': .001, 'tol': 1e-2, 'max_iter': 100} parameters = {'n_var': 40, 'n_tasks': 10, 'density': 0.15, 'alpha': .01, 'tol': 1e-3, 'max_iter': 100} mem = joblib.Memory(".") _, _, gt = create_signals(parameters, output_dir="_prof_group_sparse_covariance") signals = gt["signals"] emp_covs, n_samples = empirical_covariances(signals) print("alpha max: " + str(compute_alpha_max(emp_covs, n_samples))) # With diagonal elements initialization probe1 = ScoreProbe() est_precs1, probe1 = mem.cache(modified_gsc)(signals, parameters, probe1) probe1.comment = "diagonal" # set after execution for joblib not to see it probe1.plot() # With Ledoit-Wolf initialization ld = np.empty(emp_covs.shape) for k in range(emp_covs.shape[-1]): ld[..., k] = np.linalg.inv(ledoit_wolf(signals[k])[0]) probe1 = ScoreProbe() est_precs1, probe1 = utils.timeit(mem.cache(modified_gsc))( signals, parameters, probe=probe1) probe1.comment = "diagonal" # for joblib to ignore this value probe2 = ScoreProbe() parameters["precisions_init"] = ld est_precs2, probe2 = utils.timeit(mem.cache(modified_gsc))( signals, parameters, probe=probe2) probe2.comment = "ledoit-wolf" print("difference between final estimates (max norm) %.2e" % abs(est_precs1 - est_precs2).max()) pl.figure() pl.semilogy(probe1.timings[1:], probe1.max_norm, "+-", label=probe1.comment) pl.semilogy(probe2.timings[1:], probe2.max_norm, "+-", label=probe2.comment) pl.xlabel("Time [s]") pl.ylabel("Max norm") pl.grid() pl.legend(loc="best") pl.figure() pl.plot(probe1.timings, probe1.objective, "+-", label=probe1.comment) pl.plot(probe2.timings, probe2.objective, "+-", label=probe2.comment) pl.xlabel("Time [s]") pl.ylabel("objective") pl.grid() pl.legend(loc="best") pl.show()
def _lwf(X): """Wrapper for sklearn ledoit wolf covariance estimator""" C, _ = ledoit_wolf(X.T) return C
def test_ledoit_wolf(): """Tests LedoitWolf module on a simple dataset. """ # test shrinkage coeff on a simple data set lw = LedoitWolf() lw.fit(X, assume_centered=True) assert_almost_equal(lw.shrinkage_, 0.00192, 4) assert_almost_equal(lw.score(X, assume_centered=True), -2.89795, 4) # compare shrunk covariance obtained from data and from MLE estimate lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X, assume_centered=True) assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4) assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_) # compare estimates given by LW and ShrunkCovariance scov = ShrunkCovariance(shrinkage=lw.shrinkage_) scov.fit(X, assume_centered=True) assert_array_almost_equal(scov.covariance_, lw.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) lw = LedoitWolf() lw.fit(X_1d, assume_centered=True) lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d, assume_centered=True) assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4) assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_) assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) lw = LedoitWolf(store_precision=False) lw.fit(X, assume_centered=True) assert_almost_equal(lw.score(X, assume_centered=True), -2.89795, 4) assert(lw.precision_ is None) # Same tests without assuming centered data # test shrinkage coeff on a simple data set lw = LedoitWolf() lw.fit(X) assert_almost_equal(lw.shrinkage_, 0.007582, 4) assert_almost_equal(lw.score(X), 2.243483, 4) # compare shrunk covariance obtained from data and from MLE estimate lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X) assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4) assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_) # compare estimates given by LW and ShrunkCovariance scov = ShrunkCovariance(shrinkage=lw.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, lw.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) lw = LedoitWolf() lw.fit(X_1d) lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d) assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4) assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) lw = LedoitWolf(store_precision=False) lw.fit(X) assert_almost_equal(lw.score(X), 2.2434839, 4) assert(lw.precision_ is None)