def _gmm_from_memberships(data, memberships, covariance_type): clusters = set(memberships) n_clusters = len(clusters) gmm = GMM(n_components=n_clusters, params='m') gmm.weights_ = np.ones([n_clusters])/n_clusters gmm.means_ = np.zeros([n_clusters, data.shape[1]]) if covariance_type == 'diag': gmm.covars_ = np.zeros([n_clusters, data.shape[1]]) if covariance_type == 'spherical': gmm.covars_ = np.zeros([n_clusters]) if covariance_type == 'full': gmm.covars_ = np.zeros([n_clusters, data.shape[1], data.shape[1]]) for cluster in clusters: cluster = int(cluster) indices = (memberships == cluster) gmm.means_[cluster, :] = data[indices, :].mean(axis=0) if covariance_type in ['diag', 'spherical']: #TODO Fix covariance calculation, for now, return cov=1 #D = np.diag(np.cov(data[indices, :].T)) D = np.ones([data.shape[1]]) if covariance_type == 'spherical': gmm.covars_[cluster] = D.mean() else: gmm.covars_[cluster] = D if covariance_type == 'full': cov_estimator = OAS() cov_estimator.fit(data[indices, :]) gmm.covars_[cluster] = cov_estimator.covariance_ return gmm
def fit_base(self, X, y): """ Fit the SLDA model to the base data. :param X: an Nxd torch tensor of base initialization data :param y: an Nx1-dimensional torch tensor of the associated labels for X :return: None """ print('\nFitting Base...') X = X.to(self.device) y = y.squeeze() # update positive and negative means cls_ix = torch.arange(self.num_classes) for k in torch.unique(y): self.posW[k] = X[y == k].mean(0) self.posT[k] = X[y == k].shape[0] for k in cls_ix: self.negW[k] = X[y != k].mean(0) self.negT[k] = X[y != k].shape[0] self.num_updates = X.shape[0] print('\nEstimating initial covariance matrix...') from sklearn.covariance import OAS cov_estimator = OAS(assume_centered=True) cov_estimator.fit((X - self.posW[y]).cpu().numpy()) self.Sigma = torch.from_numpy(cov_estimator.covariance_).float().to( self.device) print('\nBuilding initial OOD threshold(s)...') self.ood_predict(X, y) print('')
def _gmm_from_memberships(data, memberships, covariance_type): clusters = set(memberships) n_clusters = len(clusters) gmm = GMM(n_components=n_clusters, params='m') gmm.weights_ = np.ones([n_clusters]) / n_clusters gmm.means_ = np.zeros([n_clusters, data.shape[1]]) if covariance_type == 'diag': gmm.covars_ = np.zeros([n_clusters, data.shape[1]]) if covariance_type == 'spherical': gmm.covars_ = np.zeros([n_clusters]) if covariance_type == 'full': gmm.covars_ = np.zeros([n_clusters, data.shape[1], data.shape[1]]) for cluster in clusters: cluster = int(cluster) indices = (memberships == cluster) gmm.means_[cluster, :] = data[indices, :].mean(axis=0) if covariance_type in ['diag', 'spherical']: #TODO Fix covariance calculation, for now, return cov=1 #D = np.diag(np.cov(data[indices, :].T)) D = np.ones([data.shape[1]]) if covariance_type == 'spherical': gmm.covars_[cluster] = D.mean() else: gmm.covars_[cluster] = D if covariance_type == 'full': cov_estimator = OAS() cov_estimator.fit(data[indices, :]) gmm.covars_[cluster] = cov_estimator.covariance_ return gmm
def OAS_est(X): ''' OAS coefficient estimate X_size = (n_samples, n_features) ''' oa = OAS() cov_oa = oa.fit(X).covariance_ return cov_oa
def _shrink_covariance(asset_returns): """ Regularise/Shrink the asset covariances. :param asset_returns: (pd.Dataframe) Asset returns :return: (pd.Dataframe) Shrinked asset returns covariances """ oas = OAS() oas.fit(asset_returns) shrinked_covariance = oas.covariance_ return shrinked_covariance
def _shrink_covariance(covariance): """ Regularise/Shrink the asset covariances. :param covariance: (pd.Dataframe) asset returns covariances :return: (pd.Dataframe) shrinked asset returns covariances """ oas = OAS() oas.fit(covariance) shrinked_covariance = oas.covariance_ return pd.DataFrame(shrinked_covariance, index=covariance.columns, columns=covariance.columns)
def shrinked_covariance(returns, price_data=False, shrinkage_type='basic', assume_centered=False, basic_shrinkage=0.1): """ Calculates the Covariance estimator with shrinkage for a dataframe of asset prices or returns. This function allows three types of shrinkage - Basic, Ledoit-Wolf and Oracle Approximating Shrinkage. It is a wrap of the sklearn's ShrunkCovariance, LedoitWolf and OAS classes. According to the scikit-learn User Guide on Covariance estimation: "Sometimes, it even occurs that the empirical covariance matrix cannot be inverted for numerical reasons. To avoid such an inversion problem, a transformation of the empirical covariance matrix has been introduced: the shrinkage. Mathematically, this shrinkage consists in reducing the ratio between the smallest and the largest eigenvalues of the empirical covariance matrix". Link to the documentation: <https://scikit-learn.org/stable/modules/covariance.html>`_ If a dataframe of prices is given, it is transformed into a dataframe of returns using the calculate_returns method from the ReturnsEstimators class. :param returns: (pd.DataFrame) Dataframe where each column is a series of returns or prices for an asset. :param price_data: (bool) Flag if prices of assets are used and not returns. (False by default) :param shrinkage_type: (str) Type of shrinkage to use. (``basic`` by default, ``lw``, ``oas``, ``all``) :param assume_centered: (bool) Flag for data with mean almost, but not exactly zero. (Read documentation for chosen shrinkage class, False by default) :param basic_shrinkage: (float) Between 0 and 1. Coefficient in the convex combination for basic shrinkage. (0.1 by default) :return: (np.array) Estimated covariance matrix. Tuple of covariance matrices if shrinkage_type = ``all``. """ # Calculating the series of returns from series of prices if price_data: # Class with returns calculation function ret_est = ReturnsEstimators() # Calculating returns returns = ret_est.calculate_returns(returns) # Calculating the covariance matrix for the chosen method if shrinkage_type == 'basic': cov_matrix = ShrunkCovariance(assume_centered=assume_centered, shrinkage=basic_shrinkage).fit( returns).covariance_ elif shrinkage_type == 'lw': cov_matrix = LedoitWolf(assume_centered=assume_centered).fit(returns).covariance_ elif shrinkage_type == 'oas': cov_matrix = OAS(assume_centered=assume_centered).fit(returns).covariance_ else: cov_matrix = ( ShrunkCovariance(assume_centered=assume_centered, shrinkage=basic_shrinkage).fit(returns).covariance_, LedoitWolf(assume_centered=assume_centered).fit(returns).covariance_, OAS(assume_centered=assume_centered).fit(returns).covariance_) return cov_matrix
def compute_connectivity_subject(conn, masker, func, confound=None): """ Returns connectivity of one fMRI for a given atlas """ ts = do_mask_img(masker, func, confound) if conn == 'gl': fc = GraphLassoCV(max_iter=1000) elif conn == 'lw': fc = LedoitWolf() elif conn == 'oas': fc = OAS() elif conn == 'scov': fc = ShrunkCovariance() fc = Bunch(covariance_=0, precision_=0) if conn == 'corr' or conn == 'pcorr': fc = Bunch(covariance_=0, precision_=0) fc.covariance_ = np.corrcoef(ts) fc.precision_ = partial_corr(ts) else: fc.fit(ts) ind = np.tril_indices(ts.shape[1], k=-1) return fc.covariance_[ind], fc.precision_[ind]
def compute(self, context, data, stocks): prices = data.history(stocks, "price", 200, "1d") prices = prices.dropna(axis=1) returns = prices.pct_change().dropna().values returns = returns * 1000. cov = OAS().fit(returns).covariance_ e, v = np.linalg.eig(cov) idx = e.argsort() comp = v[:, idx[-15:]] if comp[0, 0] < 0: comp *= -1 sources = np.dot(returns, comp) betas = np.zeros((np.shape(returns)[1], np.shape(sources)[1])) for i in range(0, np.shape(returns)[1]): model = LassoCV().fit(sources, returns[:, i]) betas[i, :] = model.coef_ W = getweights(betas, cov, np.asarray([1.] * np.shape(returns)[1])) self.prices = prices.values[0, :] pvalue = np.dot(prices.values, W / self.prices) self.mean = np.mean(pvalue) self.std = np.std(pvalue) self.signal = (pvalue[-1] - self.mean) / self.std abspval = np.sum(np.abs(W)) if abs(self.signal) < .5: return False, None, None, None return True, W, abspval, prices
def compute_network_connectivity_subject(conn, func, masker, rois): """ Returns connectivity of one fMRI for a given atlas """ ts = masker.fit_transform(func) ts = np.asarray(ts)[:, rois] if conn == 'gl': fc = GraphLassoCV(max_iter=1000) elif conn == 'lw': fc = LedoitWolf() elif conn == 'oas': fc = OAS() elif conn == 'scov': fc = ShrunkCovariance() fc = Bunch(covariance_=0, precision_=0) if conn == 'corr' or conn == 'pcorr': fc = Bunch(covariance_=0, precision_=0) fc.covariance_ = np.corrcoef(ts) fc.precision_ = partial_corr(ts) else: fc.fit(ts) ind = np.tril_indices(ts.shape[1], k=-1) return fc.covariance_[ind], fc.precision_[ind]
def lw_covars(returns): """ Calculates a constrained covariance matrix between the returns. :return: A pandas dataframe of the covariance between the returns """ co_vars = returns.cov() * WEEKDAYS_PER_YEAR if logger.isEnabledFor(logging.DEBUG): logger.debug("Calcing covars as table: {}".format( returns.to_dict('list'))) # Shrink the covars (Ledoit and Wolff) sk = OAS(assume_centered=True) sk.fit(returns.values) return (1 - sk.shrinkage_) * co_vars + sk.shrinkage_ * np.trace( co_vars) / len(co_vars) * np.identity(len(co_vars))
def _get_omega(self, returns): """ Get robust covariance matrix for use in Newton solver. Parameters ---------- returns: numpy array of return data Returns ---------- omega: array of shape nxn where n is equal to the number of securities invovled """ corr_returns = returns[-self.corr_window:, :] cov_returns = returns[-self.cov_window:, :] if self.cov_est == 'oas': omega = OAS().fit(corr_returns).covariance_ * 10**4 elif self.cov_est == 'empirical': omega = EmpiricalCovariance().fit(corr_returns).covariance_ * 10**4 else: corr = np.corrcoef(corr_returns, rowvar=False) cov_diag = np.diag(np.sqrt(np.var(cov_returns, axis=0))) omega = cov_diag @ corr @ cov_diag if self.lw_shrink is None: lw = ledoit_wolf(corr_returns)[1] omega = shrunk_covariance(omega, shrinkage=lw) * 10**4 else: omega = shrunk_covariance(omega, shrinkage=self.lw_shrink) * 10**4 return omega
def simulateLogNormal(data, covtype='Estimate', nsamples=2000, **kwargs): """ :param data: :param covtype: Type of covariance matrix estimator. Allowed types are: - Estimate (default): - Diagonal: - Shrinkage OAS: :param int nsamples: Number of simulated samples to draw :return: simulated data and empirical covariance est """ try: # Offset data to make sure there are no 0 values for log transform offset = np.min(data) + 1 offdata = data + offset # log on the offsetted data logdata = np.log(offdata) # Get the means meanslog = np.mean(logdata, axis=0) # Specify covariance # Regular covariance estimator if covtype == "Estimate": covlog = np.cov(logdata, rowvar=0) # Shrinkage covariance estimator, using LedoitWolf elif covtype == "ShrinkageLedoitWolf": scov = LedoitWolf() scov.fit(logdata) covlog = scov.covariance_ elif covtype == "ShrinkageOAS": scov = OAS() scov.fit(logdata) covlog = scov.covariance_ # Diagonal covariance matrix (no between variable correlation) elif covtype == "Diagonal": covlogdata = np.var( logdata, axis=0) #get variance of log data by each column covlog = np.diag( covlogdata ) #generate a matrix with diagonal of variance of log Data else: raise ValueError('Unknown Covariance type') simData = np.random.multivariate_normal(meanslog, covlog, nsamples) simData = np.exp(simData) simData -= offset ##Set to 0 negative values simData[np.where(simData < 0)] = 0 # work out the correlation of matrix by columns, each column is a variable corrMatrix = np.corrcoef(simData, rowvar=0) return simData, corrMatrix except Exception as exp: raise exp
def _get_coef(self, code_list): # 提供_calc_weights需要计算的参数 w.start() return_value = np.array(w.wsd(code_list, "pct_chg", "ED-" + str(self.N - 1) + "TD", self.date, "").Data) from sklearn.covariance import OAS return_cov = OAS().fit(return_value.transpose()) return_cov = return_cov.covariance_ return return_cov
def plot_lda(): n_train = 20 # samples for training n_test = 200 # samples for testing n_averages = 50 # how often to repeat classification n_features_max = 75 # maximum number of features step = 4 # step size for the calculation acc_clf1, acc_clf2, acc_clf3 = [], [], [] n_features_range = range(1, n_features_max + 1, step) for n_features in n_features_range: score_clf1, score_clf2, score_clf3 = 0, 0, 0 for _ in range(n_averages): X, y = generate_data(n_train, n_features) clf1 = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto').fit(X, y) clf2 = LinearDiscriminantAnalysis(solver='lsqr', shrinkage=None).fit(X, y) oa = OAS(store_precision=False, assume_centered=False) clf3 = LinearDiscriminantAnalysis(solver='lsqr', covariance_estimator=oa).fit( X, y) X, y = generate_data(n_test, n_features) score_clf1 += clf1.score(X, y) score_clf2 += clf2.score(X, y) score_clf3 += clf3.score(X, y) acc_clf1.append(score_clf1 / n_averages) acc_clf2.append(score_clf2 / n_averages) acc_clf3.append(score_clf3 / n_averages) features_samples_ratio = np.array(n_features_range) / n_train plt.plot(features_samples_ratio, acc_clf1, linewidth=2, label="Linear Discriminant Analysis with Ledoit Wolf", color='navy') plt.plot(features_samples_ratio, acc_clf2, linewidth=2, label="Linear Discriminant Analysis", color='gold') plt.plot(features_samples_ratio, acc_clf3, linewidth=2, label="Linear Discriminant Analysis with OAS", color='red') plt.xlabel('n_features / n_samples') plt.ylabel('Classification accuracy') plt.legend(loc=3, prop={'size': 12}) plt.suptitle('Linear Discriminant Analysis vs. ' + '\n' + 'Shrinkage Linear Discriminant Analysis vs. ' + '\n' + 'OAS Linear Discriminant Analysis (1 discriminative feature)') plt.show()
def _get_cov(self): w.start() return_value = np.array( w.wsd(self.code_list, "pct_chg", "ED-" + str(self.N_days - 1) + "TD", self.date, "").Data) from sklearn.covariance import OAS return_cov = OAS().fit(return_value.transpose()) return_cov = return_cov.covariance_ return return_cov
def fit_base(self, X, y): """ Fit the SLDA model to the base data. :param X: an Nxd torch tensor of base initialization data :param y: an Nx1-dimensional torch tensor of the associated labels for X :return: None """ print('\nFitting Base...') # update class means for k in torch.unique(y): self.muK[k] = X[y == k].mean(0) self.cK[k] = X[y == k].shape[0] self.num_updates = X.shape[0] print('\nEstimating initial covariance matrix...') from sklearn.covariance import OAS cov_estimator = OAS(assume_centered=True) cov_estimator.fit((X - self.muK[y]).cpu().numpy()) self.Sigma = torch.from_numpy(cov_estimator.covariance_).float().to( self.device)
def get_cov_estimator(cov_type): if cov_type == 'LW': model = LedoitWolf() elif cov_type == 'OAS': model = OAS() elif cov_type == 'MCD': model = MinCovDet() elif cov_type[:2] == 'SC': shrinkage = float(cov_type.split('_')[1]) model = ShrunkCovariance(shrinkage=shrinkage) elif cov_type[:2] == 'GL': alpha = float(cov_type.split('_')[1]) model = GraphicalLasso(alpha=alpha) return model
def correlations(df, categorical_portions): # The more samples, the slower, but the more accurate the categorical correlation NUM_CATEGORICAL_SAMPLES = 5 for i in range(NUM_CATEGORICAL_SAMPLES): df = df.append(df, ignore_index=True) categorical_cols = list(categorical_portions.keys()) # First generate continuous samples for categorical values. We do this by sampling from # a truncated normal distribution in the range for that continous variable. for categorical_col in categorical_cols: portions = categorical_portions[categorical_col] # The values of the categorical variable in order portions_keys = [val for val, frac in portions] for i, cat_val in enumerate(df[categorical_col]): if len(portions) == 1: # Normal sample df.loc[i, categorical_col] = norm.rvs() continue ind = portions_keys.index(cat_val) # Get sums of prev portions including and not including this portion sum_a = sum(map(lambda i: portions[i][1], range(ind))) sum_b = sum_a + portions[ind][1] # Get thresholds threshold_a = norm.ppf(sum_a, loc=0.0, scale=1.0) threshold_b = norm.ppf(sum_b, loc=0.0, scale=1.0) # Sample truncated norm df.loc[i, categorical_col] = truncnorm.rvs(threshold_a, threshold_b) # estimate covariance matrix estimator = OAS() estimator.fit(df.values) cov = pd.DataFrame(estimator.covariance_, index=df.columns, columns=df.columns) return cov
def get_stages(self): from sklearn.covariance import (EmpiricalCovariance, EllipticEnvelope, LedoitWolf, MinCovDet, OAS, ShrunkCovariance) from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler from srom.anomaly_detection.generalized_anomaly_model import GeneralizedAnomalyModel from srom.utils.no_op import NoOp return [ [('skipscaling', NoOp()), ('standardscaler', StandardScaler()), ('robustscaler', RobustScaler()), ('minmaxscaling', MinMaxScaler())], [ # Covariance Structure based Anomaly Models ('empiricalcovariance', GeneralizedAnomalyModel(base_learner=EmpiricalCovariance(), fit_function='fit', predict_function='mahalanobis', score_sign=1)), ('ellipticenvelope', GeneralizedAnomalyModel(base_learner=EllipticEnvelope(), fit_function='fit', predict_function='mahalanobis', score_sign=1)), ('ledoitwolf', GeneralizedAnomalyModel(base_learner=LedoitWolf(), fit_function='fit', predict_function='mahalanobis', score_sign=1)), ('mincovdet', GeneralizedAnomalyModel(base_learner=MinCovDet(), fit_function='fit', predict_function='mahalanobis', score_sign=1)), ('oas', GeneralizedAnomalyModel(base_learner=OAS(), fit_function='fit', predict_function='mahalanobis', score_sign=1)), ('shrunkcovariance', GeneralizedAnomalyModel(base_learner=ShrunkCovariance(), fit_function='fit', predict_function='mahalanobis', score_sign=1)), ] ]
def __init__(self, template, shrinkage='oas', center=True, cov_i=None): self.template = np.asarray(template).ravel()[:, np.newaxis] self.center = center if center: self.template -= self.template.mean() if shrinkage == 'oas': self.cov = OAS() elif shrinkage == 'lw': self.cov = LedoitWolf() elif shrinkage == 'none': self.cov = EmpiricalCovariance() elif type(shrinkage) == float or type(shrinkage) == int: self.cov = ShrunkCovariance(shrinkage=shrinkage) else: raise ValueError('Invalid value for shrinkage parameter.') self.cov_i = cov_i
def fit(self, data, dataset): """ data: array, data to fit cov on dataset: string, nametag of the data (e.g. 'mnist') """ self.dataset = dataset self.mean = np.expand_dims(data.mean(axis=0), 0) if self.mode == 'ML': self.cov = EmpiricalCovariance().fit(data).covariance_ elif self.mode == 'OAS': self.cov = OAS().fit(data).covariance_ elif self.mode == 'LW': self.cov = LedoitWolf().fit(data).covariance_ elif self.mode == 'NERCOME': self.cov = self.nercome_estimator(data) else: raise ValueError return True
def compute_connectivity_voxel(roi, voxel, conn): """ Returns connectivity of one voxel for a given roi """ if conn == 'gl': fc = GraphLassoCV(max_iter=1000) elif conn == 'lw': fc = LedoitWolf() elif conn == 'oas': fc = OAS() elif conn == 'scov': fc = ShrunkCovariance() ts = np.array([roi, voxel]).T if conn == 'corr' or conn == 'pcorr': cov = np.corrcoef(ts)[0, 1] else: fc.fit(ts) cov = fc.covariance_[0, 0] return cov
def covariance_estimator(matrix, method='ledoit-wolf', assume_centered=True, store_precision=True, **kwargs): """ Return a pre-fit estimator for covariance from one of the scikit-learn estimators :param matrix: matrix to fit covariance to :param method: method one of `SUPPORTED_SKLEARN_COVARIANCE_ESTIMATORS` :param assume_centered: whether to assume data to be centered :param store_precision: if true, computes precision matrix (i.e. the inverse covariance) too :param kwargs: other kwargs to pass to estimator :return: """ estimator = None if method == 'ledoit-wolf': estimator = LedoitWolf(assume_centered=assume_centered, store_precision=store_precision, **kwargs) elif method == 'oas': estimator = OAS(assume_centered=assume_centered, store_precision=store_precision, **kwargs) elif method == 'mincovdet': estimator = MinCovDet(assume_centered=assume_centered, store_precision=store_precision, **kwargs) elif method == 'empirical': estimator = EmpiricalCovariance(assume_centered=assume_centered, store_precision=store_precision, **kwargs) else: raise Exception('Unsupported estimator {!r}'.format(estimator)) estimator.fit(matrix.T) return estimator
repeat = 100 lw_mse = np.zeros((n_samples_range.size, repeat)) oa_mse = np.zeros((n_samples_range.size, repeat)) lw_shrinkage = np.zeros((n_samples_range.size, repeat)) oa_shrinkage = np.zeros((n_samples_range.size, repeat)) for i, n_samples in enumerate(n_samples_range): for j in range(repeat): X = np.dot( np.random.normal(size=(n_samples, n_features)), coloring_matrix.T) lw = LedoitWolf(store_precision=False) lw.fit(X, assume_centered=True) lw_mse[i,j] = lw.error_norm(real_cov, scaling=False) lw_shrinkage[i,j] = lw.shrinkage_ oa = OAS(store_precision=False) oa.fit(X, assume_centered=True) oa_mse[i,j] = oa.error_norm(real_cov, scaling=False) oa_shrinkage[i,j] = oa.shrinkage_ # plot MSE pl.subplot(2,1,1) pl.errorbar(n_samples_range, lw_mse.mean(1), yerr=lw_mse.std(1), label='Ledoit-Wolf', color='g') pl.errorbar(n_samples_range, oa_mse.mean(1), yerr=oa_mse.std(1), label='OAS', color='r') pl.ylabel("Squared error") pl.legend(loc="upper right") pl.title("Comparison of covariance estimators") pl.xlim(5, 31)
def test_oas(): # Tests OAS module on a simple dataset. # test shrinkage coeff on a simple data set X_centered = X - X.mean(axis=0) oa = OAS(assume_centered=True) oa.fit(X_centered) shrinkage_ = oa.shrinkage_ score_ = oa.score(X_centered) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_centered, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True) scov.fit(X_centered) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS(assume_centered=True) oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal((X_1d**2).sum() / n_samples, oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False, assume_centered=True) oa.fit(X_centered) assert_almost_equal(oa.score(X_centered), score_, 4) assert (oa.precision_ is None) # Same tests without assuming centered data-------------------------------- # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X) assert_almost_equal(oa.shrinkage_, shrinkage_, 4) assert_almost_equal(oa.score(X), score_, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS() oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4) # test with one sample # FIXME I don't know what this test does X_1sample = np.arange(5) oa = OAS() assert_warns(UserWarning, oa.fit, X_1sample) assert_array_almost_equal(oa.covariance_, np.zeros(shape=(5, 5), dtype=np.float64)) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X) assert_almost_equal(oa.score(X), score_, 4) assert (oa.precision_ is None)
def test_oas(): # Tests OAS module on a simple dataset. # test shrinkage coeff on a simple data set X_centered = X - X.mean(axis=0) oa = OAS(assume_centered=True) oa.fit(X_centered) shrinkage_ = oa.shrinkage_ score_ = oa.score(X_centered) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_centered, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True) scov.fit(X_centered) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0:1] oa = OAS(assume_centered=True) oa.fit(X_1d) oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_1d, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_) assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False, assume_centered=True) oa.fit(X_centered) assert_almost_equal(oa.score(X_centered), score_, 4) assert(oa.precision_ is None) # Same tests without assuming centered data-------------------------------- # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X) assert_almost_equal(oa.shrinkage_, shrinkage_, 4) assert_almost_equal(oa.score(X), score_, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shrinkage_from_mle = oas(X) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS() oa.fit(X_1d) oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_1d) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4) # test with one sample # warning should be raised when using only 1 sample X_1sample = np.arange(5).reshape(1, 5) oa = OAS() assert_warns(UserWarning, oa.fit, X_1sample) assert_array_almost_equal(oa.covariance_, np.zeros(shape=(5, 5), dtype=np.float64)) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X) assert_almost_equal(oa.score(X), score_, 4) assert(oa.precision_ is None)
def cov2corr(cov): std_ = np.sqrt(np.diag(cov)) corr = cov / np.outer(std_, std_) return corr if has_sklearn: from sklearn.covariance import LedoitWolf, OAS, MCD lw = LedoitWolf(store_precision=False) lw.fit(rr, assume_centered=False) cov_lw = lw.covariance_ corr_lw = cov2corr(cov_lw) oas = OAS(store_precision=False) oas.fit(rr, assume_centered=False) cov_oas = oas.covariance_ corr_oas = cov2corr(cov_oas) mcd = MCD() #.fit(rr, reweight=None) mcd.fit(rr, assume_centered=False) cov_mcd = mcd.covariance_ corr_mcd = cov2corr(cov_mcd) titles = ['raw correlation', 'lw', 'oas', 'mcd'] normcolor = None fig = plt.figure() for i, c in enumerate([rrcorr, corr_lw, corr_oas, corr_mcd]): #for i, c in enumerate([np.cov(rr, rowvar=0), cov_lw, cov_oas, cov_mcd]): ax = fig.add_subplot(2, 2, i + 1)
print timecourse_files # roll through the subjects print np.shape(timecourse_data)[0] for i in range(np.shape(timecourse_data)[0]) : #for i in range(10) : print i # extract the timecourses for this subejct subject_timecourses = timecourse_data[i, : ,:] #print np.shape(subject_timecourses) # calculate Pearson covariance X = scale(subject_timecourses, axis=1) cov = np.dot(X, np.transpose(X)) / np.shape(X)[1] print cov[:5, :5] print logm(cov)[:5, :5] # calculate sparse inverse covariance (precision) matrix model = OAS(store_precision=False, assume_centered=True) model.fit(np.transpose(X)) cov = model.covariance_ OAS_matrices[i, :] = np.reshape(cov, (1, 8100)) #print cov[:5, :5] foo = logm(cov) #print logm(cov[:5, :5]) ## save the data np.savetxt('/home/jonyoung/IoP_data/Data/connectivity_data/OAS_data.csv', OAS_matrices, delimiter=',')
# add non-discriminative features if n_features > 1: X = np.hstack([X, np.random.randn(n_samples, n_features - 1)]) return X, y acc_clf1, acc_clf2, acc_clf3 = [], [], [] n_features_range = range(1, n_features_max + 1, step) for n_features in n_features_range: score_clf1, score_clf2, score_clf3 = 0, 0, 0 for _ in range(n_averages): X, y = generate_data(n_train, n_features) clf1 = LinearDiscriminantAnalysis(solver="lsqr", shrinkage="auto").fit(X, y) clf2 = LinearDiscriminantAnalysis(solver="lsqr", shrinkage=None).fit(X, y) oa = OAS(store_precision=False, assume_centered=False) clf3 = LinearDiscriminantAnalysis(solver="lsqr", covariance_estimator=oa).fit( X, y ) X, y = generate_data(n_test, n_features) score_clf1 += clf1.score(X, y) score_clf2 += clf2.score(X, y) score_clf3 += clf3.score(X, y) acc_clf1.append(score_clf1 / n_averages) acc_clf2.append(score_clf2 / n_averages) acc_clf3.append(score_clf3 / n_averages) features_samples_ratio = np.array(n_features_range) / n_train
def test_oas(): """Tests OAS module on a simple dataset. """ # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X, assume_centered=True) assert_almost_equal(oa.shrinkage_, 0.018740, 4) assert_almost_equal(oa.score(X, assume_centered=True), -5.03605, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X, assume_centered=True) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS() oa.fit(X_1d, assume_centered=True) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X, assume_centered=True) assert_almost_equal(oa.score(X, assume_centered=True), -5.03605, 4) assert(oa.precision_ is None) ### Same tests without assuming centered data # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X) assert_almost_equal(oa.shrinkage_, 0.020236, 4) assert_almost_equal(oa.score(X), 2.079025, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS() oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X) assert_almost_equal(oa.score(X), 2.079025, 4) assert(oa.precision_ is None)
X_train = np.dot(base_X_train, coloring_matrix) X_test = np.dot(base_X_test, coloring_matrix) ############################################################################### # Compute Ledoit-Wolf and Covariances on a grid of shrinkages from sklearn.covariance import LedoitWolf, OAS, ShrunkCovariance, \ log_likelihood, empirical_covariance # Ledoit-Wolf optimal shrinkage coefficient estimate lw = LedoitWolf() loglik_lw = lw.fit(X_train, assume_centered=True).score( X_test, assume_centered=True) # OAS coefficient estimate oa = OAS() loglik_oa = oa.fit(X_train, assume_centered=True).score( X_test, assume_centered=True) # spanning a range of possible shrinkage coefficient values shrinkages = np.logspace(-3, 0, 30) negative_logliks = [-ShrunkCovariance(shrinkage=s).fit( X_train, assume_centered=True).score(X_test, assume_centered=True) \ for s in shrinkages] # getting the likelihood under the real model real_cov = np.dot(coloring_matrix.T, coloring_matrix) emp_cov = empirical_covariance(X_train) loglik_real = -log_likelihood(emp_cov, linalg.inv(real_cov)) ###############################################################################
def test_oas(): """Tests OAS module on a simple dataset. """ # test shrinkage coeff on a simple data set X_centered = X - X.mean(axis=0) oa = OAS(assume_centered=True) oa.fit(X_centered) shrinkage_ = oa.shrinkage_ score_ = oa.score(X_centered) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_centered, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True) scov.fit(X_centered) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS(assume_centered=True) oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal((X_1d**2).sum() / n_samples, oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False, assume_centered=True) oa.fit(X_centered) assert_almost_equal(oa.score(X_centered), score_, 4) assert (oa.precision_ is None) ### Same tests without assuming centered data # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X) assert_almost_equal(oa.shrinkage_, shrinkage_, 4) assert_almost_equal(oa.score(X), score_, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS() oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4) # test with one sample X_1sample = np.arange(5) oa = OAS() with warnings.catch_warnings(record=True): oa.fit(X_1sample) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X) assert_almost_equal(oa.score(X), score_, 4) assert (oa.precision_ is None)
def test_oas(): """Tests OAS module on a simple dataset. """ # test shrinkage coeff on a simple data set X_centered = X - X.mean(axis=0) oa = OAS(assume_centered=True) oa.fit(X_centered) shrinkage_ = oa.shrinkage_ score_ = oa.score(X_centered) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_centered, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True) scov.fit(X_centered) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS(assume_centered=True) oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False, assume_centered=True) oa.fit(X_centered) assert_almost_equal(oa.score(X_centered), score_, 4) assert(oa.precision_ is None) ### Same tests without assuming centered data # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X) assert_almost_equal(oa.shrinkage_, shrinkage_, 4) assert_almost_equal(oa.score(X), score_, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS() oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4) # test with one sample X_1sample = np.arange(5) oa = OAS() with warnings.catch_warnings(record=True): oa.fit(X_1sample) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X) assert_almost_equal(oa.score(X), score_, 4) assert(oa.precision_ is None)
loglik_real = -log_likelihood(emp_cov, linalg.inv(real_cov)) # ############################################################################# # Compare different approaches to setting the parameter # GridSearch for an optimal shrinkage coefficient tuned_parameters = [{'shrinkage': shrinkages}] cv = GridSearchCV(ShrunkCovariance(), tuned_parameters, cv=5) cv.fit(X_train) # Ledoit-Wolf optimal shrinkage coefficient estimate lw = LedoitWolf() loglik_lw = lw.fit(X_train).score(X_test) # OAS coefficient estimate oa = OAS() loglik_oa = oa.fit(X_train).score(X_test) # ############################################################################# # Plot results fig = plt.figure() plt.title("Regularized covariance: likelihood and shrinkage coefficient") plt.xlabel('Regularization parameter: shrinkage coefficient') plt.ylabel('Error: negative log-likelihood on test data') # range shrinkage curve plt.loglog(shrinkages, negative_logliks, label="Negative log-likelihood") plt.plot(plt.xlim(), 2 * [loglik_real], '--r', label="Real covariance likelihood") # adjust view
args = parser.parse_args() if args.verbose: print('sys.argv:') print(sys.argv) print() print('numpy version:', np.__version__) print('pandas version:', pd.__version__) print('scipy version:', sp.__version__) print() gene_expr_raw = pd.read_table(args.data) gene_expr = gene_expr_raw.T X_centered = (gene_expr - gene_expr.mean()) / np.sqrt(gene_expr.var()) oa = OAS(store_precision=True, assume_centered=True) gene_expr_OAS_corr = oa.fit(X_centered) n_genes = gene_expr_OAS_corr.covariance_.shape[1] g = Graph(directed=False) g.add_vertex(n=n_genes) spearman = g.new_ep("double", 0) pval = g.new_ep("double", 0) genes = g.new_vertex_property( "string", np.array(np.array(gene_expr.columns, dtype="str"))) g.vertex_properties["genes"] = genes for i in range(n_genes): for j in range(i): spearman_r = sp.stats.spearmanr(X_centered.iloc[:, i],
repeat = 100 lw_mse = np.zeros((n_samples_range.size, repeat)) oa_mse = np.zeros((n_samples_range.size, repeat)) lw_shrinkage = np.zeros((n_samples_range.size, repeat)) oa_shrinkage = np.zeros((n_samples_range.size, repeat)) for i, n_samples in enumerate(n_samples_range): for j in range(repeat): X = np.dot(np.random.normal(size=(n_samples, n_features)), coloring_matrix.T) lw = LedoitWolf(store_precision=False, assume_centered=True) lw.fit(X) lw_mse[i, j] = lw.error_norm(real_cov, scaling=False) lw_shrinkage[i, j] = lw.shrinkage_ oa = OAS(store_precision=False, assume_centered=True) oa.fit(X) oa_mse[i, j] = oa.error_norm(real_cov, scaling=False) oa_shrinkage[i, j] = oa.shrinkage_ # plot MSE plt.subplot(2, 1, 1) plt.errorbar( n_samples_range, lw_mse.mean(1), yerr=lw_mse.std(1), label="Ledoit-Wolf", color="navy", lw=2, ) plt.errorbar(
def set_optimal_shrinkage_amount(self, X, method="cv", verbose=False): """Set optimal shrinkage amount according to chosen method. /!\ Could be rewritten with GridSearchCV. Parameters ---------- X: array-like, shape = [n_samples, n_features] Training data, where n_samples is the number of samples and n_features is the number of features. method: float or str in {"cv", "lw", "oas"}, The method used to set the shrinkage. If a floating value is provided that value is used. Otherwise, the selection is made according to the selected method. "cv" (default): 10-fold cross-validation. (or Leave-One Out cross-validation if n_samples < 10) "lw": Ledoit-Wolf criterion "oas": OAS criterion verbose: bool, Verbose mode or not. Returns ------- optimal_shrinkage: float, The optimal amount of shrinkage. """ n_samples, n_features = X.shape if isinstance(method, str): std_shrinkage = np.trace(empirical_covariance(X)) / \ (n_features * n_samples) self.std_shrinkage = std_shrinkage if method == "cv": from sklearn.covariance import log_likelihood n_samples, n_features = X.shape shrinkage_range = np.concatenate( ([0.], 10.**np.arange(-n_samples / n_features, -1, 0.5), np.arange(0.05, 1., 0.05), np.arange(1., 20., 1.), np.arange(20., 100, 5.), 10.**np.arange(2, 7, 0.5))) # get a "pure" active set with a standard shrinkage active_set_estimator = RMCDl2(shrinkage=std_shrinkage) active_set_estimator.fit(X) active_set = np.where(active_set_estimator.support_)[0] # split this active set in ten parts active_set = active_set[np.random.permutation(active_set.size)] if active_set.size >= 10: # ten fold cross-validation n_folds = 10 fold_size = active_set.size / 10 else: n_folds = active_set.size fold_size = 1 log_likelihoods = np.zeros((shrinkage_range.size, n_folds)) if verbose: print "*** Cross-validation" for trial in range(n_folds): if verbose: print trial / float(n_folds) # define train and test sets train_set_indices = np.concatenate( (np.arange(0, fold_size * trial), np.arange(fold_size * (trial + 1), n_folds * fold_size))) train_set = X[active_set[train_set_indices]] test_set = X[active_set[np.arange(fold_size * trial, fold_size * (trial + 1))]] # learn location and covariance estimates from train set # for several amounts of shrinkage for i, shrinkage in enumerate(shrinkage_range): location = test_set.mean(0) cov = empirical_covariance(train_set) cov.flat[::(n_features + 1)] += shrinkage * std_shrinkage # compute test data likelihood log_likelihoods[i, trial] = log_likelihood( empirical_covariance(test_set - location, assume_centered=True), pinvh(cov)) optimal_shrinkage = shrinkage_range[np.argmax( log_likelihoods.mean(1))] self.shrinkage = optimal_shrinkage * std_shrinkage self.shrinkage_cst = optimal_shrinkage if verbose: print "optimal shrinkage: %g (%g x lambda(= %g))" \ % (self.shrinkage, optimal_shrinkage, std_shrinkage) self.log_likelihoods = log_likelihoods self.shrinkage_range = shrinkage_range return shrinkage_range, log_likelihoods elif method == "oas": from sklearn.covariance import OAS rmcd = self.__init__(shrinkage=std_shrinkage) support = rmcd.fit(X).support_ oas = OAS().fit(X[support]) if oas.shrinkage_ == 1: self.shrinkage_cst = np.inf else: self.shrinkage_cst = oas.shrinkage_ / (1. - oas.shrinkage_) self.shrinkage = self.shrinkage_cst * std_shrinkage * n_features elif method == "lw": from sklearn.covariance import LedoitWolf rmcd = RMCDl2(self, h=self.h, shrinkage=std_shrinkage) support = rmcd.fit(X).support_ lw = LedoitWolf().fit(X[support]) if lw.shrinkage_ == 1: self.shrinkage_cst = np.inf else: self.shrinkage_cst = lw.shrinkage_ / (1. - lw.shrinkage_) self.shrinkage = self.shrinkage_cst * std_shrinkage * n_features else: pass return
def cov2corr(cov): std_ = np.sqrt(np.diag(cov)) corr = cov / np.outer(std_, std_) return corr if has_sklearn: from sklearn.covariance import LedoitWolf, OAS, MCD lw = LedoitWolf(store_precision=False) lw.fit(rr, assume_centered=False) cov_lw = lw.covariance_ corr_lw = cov2corr(cov_lw) oas = OAS(store_precision=False) oas.fit(rr, assume_centered=False) cov_oas = oas.covariance_ corr_oas = cov2corr(cov_oas) mcd = MCD()#.fit(rr, reweight=None) mcd.fit(rr, assume_centered=False) cov_mcd = mcd.covariance_ corr_mcd = cov2corr(cov_mcd) titles = ['raw correlation', 'lw', 'oas', 'mcd'] normcolor = None fig = plt.figure() for i, c in enumerate([rrcorr, corr_lw, corr_oas, corr_mcd]): #for i, c in enumerate([np.cov(rr, rowvar=0), cov_lw, cov_oas, cov_mcd]): ax = fig.add_subplot(2,2,i+1)
repeat = 100 lw_mse = np.zeros((n_samples_range.size, repeat)) oa_mse = np.zeros((n_samples_range.size, repeat)) lw_shrinkage = np.zeros((n_samples_range.size, repeat)) oa_shrinkage = np.zeros((n_samples_range.size, repeat)) for i, n_samples in enumerate(n_samples_range): for j in range(repeat): X = np.dot( np.random.normal(size=(n_samples, n_features)), coloring_matrix.T) lw = LedoitWolf(store_precision=False, assume_centered=True) lw.fit(X) lw_mse[i, j] = lw.error_norm(real_cov, scaling=False) lw_shrinkage[i, j] = lw.shrinkage_ oa = OAS(store_precision=False, assume_centered=True) oa.fit(X) oa_mse[i, j] = oa.error_norm(real_cov, scaling=False) oa_shrinkage[i, j] = oa.shrinkage_ # plot MSE plt.subplot(2, 1, 1) plt.errorbar(n_samples_range, lw_mse.mean(1), yerr=lw_mse.std(1), label='Ledoit-Wolf', color='g') plt.errorbar(n_samples_range, oa_mse.mean(1), yerr=oa_mse.std(1), label='OAS', color='r') plt.ylabel("Squared error") plt.legend(loc="upper right") plt.title("Comparison of covariance estimators") plt.xlim(5, 31)