예제 #1
0
def _gmm_from_memberships(data, memberships, covariance_type):
    clusters = set(memberships)
    n_clusters = len(clusters)
    gmm = GMM(n_components=n_clusters, params='m')
    gmm.weights_ = np.ones([n_clusters])/n_clusters
    gmm.means_ = np.zeros([n_clusters, data.shape[1]]) 
    if covariance_type == 'diag':
        gmm.covars_ = np.zeros([n_clusters, data.shape[1]])
    if covariance_type == 'spherical':
        gmm.covars_ = np.zeros([n_clusters])
    if covariance_type == 'full':
        gmm.covars_ = np.zeros([n_clusters, data.shape[1], data.shape[1]])

    for cluster in clusters:
        cluster = int(cluster)
        indices = (memberships == cluster)
        gmm.means_[cluster, :] = data[indices, :].mean(axis=0)
        if covariance_type in ['diag', 'spherical']:
            #TODO Fix covariance calculation, for now, return cov=1
            #D = np.diag(np.cov(data[indices, :].T))
            D = np.ones([data.shape[1]])
            if covariance_type == 'spherical':
                gmm.covars_[cluster] = D.mean()
            else:
                gmm.covars_[cluster] = D
        if covariance_type == 'full':
            cov_estimator = OAS()
            cov_estimator.fit(data[indices, :])
            gmm.covars_[cluster] = cov_estimator.covariance_
    return gmm
예제 #2
0
    def fit_base(self, X, y):
        """
        Fit the SLDA model to the base data.
        :param X: an Nxd torch tensor of base initialization data
        :param y: an Nx1-dimensional torch tensor of the associated labels for X
        :return: None
        """
        print('\nFitting Base...')
        X = X.to(self.device)
        y = y.squeeze()

        # update positive and negative means
        cls_ix = torch.arange(self.num_classes)
        for k in torch.unique(y):
            self.posW[k] = X[y == k].mean(0)
            self.posT[k] = X[y == k].shape[0]
        for k in cls_ix:
            self.negW[k] = X[y != k].mean(0)
            self.negT[k] = X[y != k].shape[0]
        self.num_updates = X.shape[0]

        print('\nEstimating initial covariance matrix...')
        from sklearn.covariance import OAS
        cov_estimator = OAS(assume_centered=True)
        cov_estimator.fit((X - self.posW[y]).cpu().numpy())
        self.Sigma = torch.from_numpy(cov_estimator.covariance_).float().to(
            self.device)

        print('\nBuilding initial OOD threshold(s)...')
        self.ood_predict(X, y)

        print('')
예제 #3
0
def _gmm_from_memberships(data, memberships, covariance_type):
    clusters = set(memberships)
    n_clusters = len(clusters)
    gmm = GMM(n_components=n_clusters, params='m')
    gmm.weights_ = np.ones([n_clusters]) / n_clusters
    gmm.means_ = np.zeros([n_clusters, data.shape[1]])
    if covariance_type == 'diag':
        gmm.covars_ = np.zeros([n_clusters, data.shape[1]])
    if covariance_type == 'spherical':
        gmm.covars_ = np.zeros([n_clusters])
    if covariance_type == 'full':
        gmm.covars_ = np.zeros([n_clusters, data.shape[1], data.shape[1]])

    for cluster in clusters:
        cluster = int(cluster)
        indices = (memberships == cluster)
        gmm.means_[cluster, :] = data[indices, :].mean(axis=0)
        if covariance_type in ['diag', 'spherical']:
            #TODO Fix covariance calculation, for now, return cov=1
            #D = np.diag(np.cov(data[indices, :].T))
            D = np.ones([data.shape[1]])
            if covariance_type == 'spherical':
                gmm.covars_[cluster] = D.mean()
            else:
                gmm.covars_[cluster] = D
        if covariance_type == 'full':
            cov_estimator = OAS()
            cov_estimator.fit(data[indices, :])
            gmm.covars_[cluster] = cov_estimator.covariance_
    return gmm
예제 #4
0
def OAS_est(X):
    '''
    OAS coefficient estimate
    X_size = (n_samples, n_features)
    '''
    oa = OAS()
    cov_oa = oa.fit(X).covariance_

    return cov_oa
예제 #5
0
    def _shrink_covariance(asset_returns):
        """
        Regularise/Shrink the asset covariances.

        :param asset_returns: (pd.Dataframe) Asset returns
        :return: (pd.Dataframe) Shrinked asset returns covariances
        """

        oas = OAS()
        oas.fit(asset_returns)
        shrinked_covariance = oas.covariance_
        return shrinked_covariance
예제 #6
0
    def _shrink_covariance(covariance):
        """
        Regularise/Shrink the asset covariances.

        :param covariance: (pd.Dataframe) asset returns covariances
        :return: (pd.Dataframe) shrinked asset returns covariances
        """

        oas = OAS()
        oas.fit(covariance)
        shrinked_covariance = oas.covariance_
        return pd.DataFrame(shrinked_covariance, index=covariance.columns, columns=covariance.columns)
예제 #7
0
    def shrinked_covariance(returns, price_data=False, shrinkage_type='basic', assume_centered=False,
                            basic_shrinkage=0.1):
        """
        Calculates the Covariance estimator with shrinkage for a dataframe of asset prices or returns.

        This function allows three types of shrinkage - Basic, Ledoit-Wolf and Oracle Approximating Shrinkage.
        It is a wrap of the sklearn's ShrunkCovariance, LedoitWolf and OAS classes. According to the
        scikit-learn User Guide on Covariance estimation:

        "Sometimes, it even occurs that the empirical covariance matrix cannot be inverted for numerical
        reasons. To avoid such an inversion problem, a transformation of the empirical covariance matrix
        has been introduced: the shrinkage. Mathematically, this shrinkage consists in reducing the ratio
        between the smallest and the largest eigenvalues of the empirical covariance matrix".

        Link to the documentation:
        <https://scikit-learn.org/stable/modules/covariance.html>`_

        If a dataframe of prices is given, it is transformed into a dataframe of returns using
        the calculate_returns method from the ReturnsEstimators class.

        :param returns: (pd.DataFrame) Dataframe where each column is a series of returns or prices for an asset.
        :param price_data: (bool) Flag if prices of assets are used and not returns. (False by default)
        :param shrinkage_type: (str) Type of shrinkage to use. (``basic`` by default, ``lw``, ``oas``, ``all``)
        :param assume_centered: (bool) Flag for data with mean almost, but not exactly zero.
                                       (Read documentation for chosen shrinkage class, False by default)
        :param basic_shrinkage: (float) Between 0 and 1. Coefficient in the convex combination for basic shrinkage.
                                        (0.1 by default)
        :return: (np.array) Estimated covariance matrix. Tuple of covariance matrices if shrinkage_type = ``all``.
        """

        # Calculating the series of returns from series of prices
        if price_data:
            # Class with returns calculation function
            ret_est = ReturnsEstimators()

            # Calculating returns
            returns = ret_est.calculate_returns(returns)

        # Calculating the covariance matrix for the chosen method
        if shrinkage_type == 'basic':
            cov_matrix = ShrunkCovariance(assume_centered=assume_centered, shrinkage=basic_shrinkage).fit(
                returns).covariance_
        elif shrinkage_type == 'lw':
            cov_matrix = LedoitWolf(assume_centered=assume_centered).fit(returns).covariance_
        elif shrinkage_type == 'oas':
            cov_matrix = OAS(assume_centered=assume_centered).fit(returns).covariance_
        else:
            cov_matrix = (
                ShrunkCovariance(assume_centered=assume_centered, shrinkage=basic_shrinkage).fit(returns).covariance_,
                LedoitWolf(assume_centered=assume_centered).fit(returns).covariance_,
                OAS(assume_centered=assume_centered).fit(returns).covariance_)

        return cov_matrix
예제 #8
0
def compute_connectivity_subject(conn, masker, func, confound=None):
    """ Returns connectivity of one fMRI for a given atlas
    """

    ts = do_mask_img(masker, func, confound)

    if conn == 'gl':
        fc = GraphLassoCV(max_iter=1000)
    elif conn == 'lw':
        fc = LedoitWolf()
    elif conn == 'oas':
        fc = OAS()
    elif conn == 'scov':
        fc = ShrunkCovariance()

    fc = Bunch(covariance_=0, precision_=0)

    if conn == 'corr' or conn == 'pcorr':
        fc = Bunch(covariance_=0, precision_=0)
        fc.covariance_ = np.corrcoef(ts)
        fc.precision_ = partial_corr(ts)
    else:
        fc.fit(ts)
    ind = np.tril_indices(ts.shape[1], k=-1)
    return fc.covariance_[ind], fc.precision_[ind]
    def compute(self, context, data, stocks):
        prices = data.history(stocks, "price", 200, "1d")
        prices = prices.dropna(axis=1)
        returns = prices.pct_change().dropna().values
        returns = returns * 1000.
        cov = OAS().fit(returns).covariance_
        e, v = np.linalg.eig(cov)
        idx = e.argsort()
        comp = v[:, idx[-15:]]

        if comp[0, 0] < 0:
            comp *= -1

        sources = np.dot(returns, comp)
        betas = np.zeros((np.shape(returns)[1], np.shape(sources)[1]))

        for i in range(0, np.shape(returns)[1]):
            model = LassoCV().fit(sources, returns[:, i])
            betas[i, :] = model.coef_

        W = getweights(betas, cov, np.asarray([1.] * np.shape(returns)[1]))
        self.prices = prices.values[0, :]
        pvalue = np.dot(prices.values, W / self.prices)
        self.mean = np.mean(pvalue)
        self.std = np.std(pvalue)
        self.signal = (pvalue[-1] - self.mean) / self.std
        abspval = np.sum(np.abs(W))

        if abs(self.signal) < .5:
            return False, None, None, None
        return True, W, abspval, prices
def compute_network_connectivity_subject(conn, func, masker, rois):
    """ Returns connectivity of one fMRI for a given atlas
    """
    ts = masker.fit_transform(func)
    ts = np.asarray(ts)[:, rois]

    if conn == 'gl':
        fc = GraphLassoCV(max_iter=1000)
    elif conn == 'lw':
        fc = LedoitWolf()
    elif conn == 'oas':
        fc = OAS()
    elif conn == 'scov':
        fc = ShrunkCovariance()

        fc = Bunch(covariance_=0, precision_=0)

    if conn == 'corr' or conn == 'pcorr':
        fc = Bunch(covariance_=0, precision_=0)
        fc.covariance_ = np.corrcoef(ts)
        fc.precision_ = partial_corr(ts)
    else:
        fc.fit(ts)
    ind = np.tril_indices(ts.shape[1], k=-1)
    return fc.covariance_[ind], fc.precision_[ind]
예제 #11
0
파일: covar.py 프로젝트: dan0712/django-app
def lw_covars(returns):
    """
    Calculates a constrained covariance matrix between the returns.
    :return: A pandas dataframe of the covariance between the returns
    """
    co_vars = returns.cov() * WEEKDAYS_PER_YEAR

    if logger.isEnabledFor(logging.DEBUG):
        logger.debug("Calcing covars as table: {}".format(
            returns.to_dict('list')))

    # Shrink the covars (Ledoit and Wolff)
    sk = OAS(assume_centered=True)
    sk.fit(returns.values)
    return (1 - sk.shrinkage_) * co_vars + sk.shrinkage_ * np.trace(
        co_vars) / len(co_vars) * np.identity(len(co_vars))
예제 #12
0
    def _get_omega(self, returns):
        """
        Get robust covariance matrix for use in Newton solver.
        
        Parameters
        ----------
        returns: numpy array of return data
        
        Returns
        ----------
        omega: array of shape nxn where n is equal to the number of
               securities invovled
        """
        corr_returns = returns[-self.corr_window:, :]
        cov_returns = returns[-self.cov_window:, :]

        if self.cov_est == 'oas':
            omega = OAS().fit(corr_returns).covariance_ * 10**4
        elif self.cov_est == 'empirical':
            omega = EmpiricalCovariance().fit(corr_returns).covariance_ * 10**4
        else:
            corr = np.corrcoef(corr_returns, rowvar=False)
            cov_diag = np.diag(np.sqrt(np.var(cov_returns, axis=0)))
            omega = cov_diag @ corr @ cov_diag
            if self.lw_shrink is None:
                lw = ledoit_wolf(corr_returns)[1]
                omega = shrunk_covariance(omega, shrinkage=lw) * 10**4
            else:
                omega = shrunk_covariance(omega,
                                          shrinkage=self.lw_shrink) * 10**4
        return omega
예제 #13
0
def simulateLogNormal(data, covtype='Estimate', nsamples=2000, **kwargs):
    """

    :param data:
    :param covtype: Type of covariance matrix estimator. Allowed types are:
        - Estimate (default):
        - Diagonal:
        - Shrinkage OAS:
    :param int nsamples: Number of simulated samples to draw
    :return: simulated data and empirical covariance est
    """

    try:
        # Offset data to make sure there are no 0 values for log transform
        offset = np.min(data) + 1
        offdata = data + offset

        # log on the offsetted data
        logdata = np.log(offdata)
        # Get the means
        meanslog = np.mean(logdata, axis=0)

        # Specify covariance
        # Regular covariance estimator
        if covtype == "Estimate":
            covlog = np.cov(logdata, rowvar=0)
        # Shrinkage covariance estimator, using LedoitWolf
        elif covtype == "ShrinkageLedoitWolf":
            scov = LedoitWolf()
            scov.fit(logdata)
            covlog = scov.covariance_
        elif covtype == "ShrinkageOAS":
            scov = OAS()
            scov.fit(logdata)
            covlog = scov.covariance_

        # Diagonal covariance matrix (no between variable correlation)
        elif covtype == "Diagonal":
            covlogdata = np.var(
                logdata, axis=0)  #get variance of log data by each column
            covlog = np.diag(
                covlogdata
            )  #generate a matrix with diagonal of variance of log Data
        else:
            raise ValueError('Unknown Covariance type')

        simData = np.random.multivariate_normal(meanslog, covlog, nsamples)
        simData = np.exp(simData)
        simData -= offset

        ##Set to 0 negative values
        simData[np.where(simData < 0)] = 0
        # work out the correlation of matrix by columns, each column is a variable
        corrMatrix = np.corrcoef(simData, rowvar=0)

        return simData, corrMatrix

    except Exception as exp:
        raise exp
 def _get_coef(self, code_list):
     # 提供_calc_weights需要计算的参数
     w.start()
     return_value = np.array(w.wsd(code_list, "pct_chg", "ED-" + str(self.N - 1) + "TD", self.date, "").Data)
     from sklearn.covariance import OAS
     return_cov = OAS().fit(return_value.transpose())
     return_cov = return_cov.covariance_
     return return_cov
예제 #15
0
def plot_lda():
    n_train = 20  # samples for training
    n_test = 200  # samples for testing
    n_averages = 50  # how often to repeat classification
    n_features_max = 75  # maximum number of features
    step = 4  # step size for the calculation

    acc_clf1, acc_clf2, acc_clf3 = [], [], []
    n_features_range = range(1, n_features_max + 1, step)
    for n_features in n_features_range:
        score_clf1, score_clf2, score_clf3 = 0, 0, 0
        for _ in range(n_averages):
            X, y = generate_data(n_train, n_features)

            clf1 = LinearDiscriminantAnalysis(solver='lsqr',
                                              shrinkage='auto').fit(X, y)
            clf2 = LinearDiscriminantAnalysis(solver='lsqr',
                                              shrinkage=None).fit(X, y)
            oa = OAS(store_precision=False, assume_centered=False)
            clf3 = LinearDiscriminantAnalysis(solver='lsqr',
                                              covariance_estimator=oa).fit(
                                                  X, y)

            X, y = generate_data(n_test, n_features)
            score_clf1 += clf1.score(X, y)
            score_clf2 += clf2.score(X, y)
            score_clf3 += clf3.score(X, y)

        acc_clf1.append(score_clf1 / n_averages)
        acc_clf2.append(score_clf2 / n_averages)
        acc_clf3.append(score_clf3 / n_averages)

    features_samples_ratio = np.array(n_features_range) / n_train

    plt.plot(features_samples_ratio,
             acc_clf1,
             linewidth=2,
             label="Linear Discriminant Analysis with Ledoit Wolf",
             color='navy')
    plt.plot(features_samples_ratio,
             acc_clf2,
             linewidth=2,
             label="Linear Discriminant Analysis",
             color='gold')
    plt.plot(features_samples_ratio,
             acc_clf3,
             linewidth=2,
             label="Linear Discriminant Analysis with OAS",
             color='red')

    plt.xlabel('n_features / n_samples')
    plt.ylabel('Classification accuracy')

    plt.legend(loc=3, prop={'size': 12})
    plt.suptitle('Linear Discriminant Analysis vs. ' + '\n' +
                 'Shrinkage Linear Discriminant Analysis vs. ' + '\n' +
                 'OAS Linear Discriminant Analysis (1 discriminative feature)')
    plt.show()
예제 #16
0
 def _get_cov(self):
     w.start()
     return_value = np.array(
         w.wsd(self.code_list, "pct_chg",
               "ED-" + str(self.N_days - 1) + "TD", self.date, "").Data)
     from sklearn.covariance import OAS
     return_cov = OAS().fit(return_value.transpose())
     return_cov = return_cov.covariance_
     return return_cov
예제 #17
0
    def fit_base(self, X, y):
        """
        Fit the SLDA model to the base data.
        :param X: an Nxd torch tensor of base initialization data
        :param y: an Nx1-dimensional torch tensor of the associated labels for X
        :return: None
        """
        print('\nFitting Base...')

        # update class means
        for k in torch.unique(y):
            self.muK[k] = X[y == k].mean(0)
            self.cK[k] = X[y == k].shape[0]
        self.num_updates = X.shape[0]

        print('\nEstimating initial covariance matrix...')
        from sklearn.covariance import OAS
        cov_estimator = OAS(assume_centered=True)
        cov_estimator.fit((X - self.muK[y]).cpu().numpy())
        self.Sigma = torch.from_numpy(cov_estimator.covariance_).float().to(
            self.device)
예제 #18
0
def get_cov_estimator(cov_type):
    if cov_type == 'LW':
        model = LedoitWolf()
    elif cov_type == 'OAS':
        model = OAS()
    elif cov_type == 'MCD':
        model = MinCovDet()
    elif cov_type[:2] == 'SC':
        shrinkage = float(cov_type.split('_')[1])
        model = ShrunkCovariance(shrinkage=shrinkage)
    elif cov_type[:2] == 'GL':
        alpha = float(cov_type.split('_')[1])
        model = GraphicalLasso(alpha=alpha)
    return model
예제 #19
0
def correlations(df, categorical_portions):
    # The more samples, the slower, but the more accurate the categorical correlation
    NUM_CATEGORICAL_SAMPLES = 5
    for i in range(NUM_CATEGORICAL_SAMPLES):
        df = df.append(df, ignore_index=True)
    categorical_cols = list(categorical_portions.keys())
    # First generate continuous samples for categorical values. We do this by sampling from
    # a truncated normal distribution in the range for that continous variable.
    for categorical_col in categorical_cols:
        portions = categorical_portions[categorical_col]
        # The values of the categorical variable in order
        portions_keys = [val for val, frac in portions]
        for i, cat_val in enumerate(df[categorical_col]):
            if len(portions) == 1:
                # Normal sample
                df.loc[i, categorical_col] = norm.rvs()
                continue
            ind = portions_keys.index(cat_val)
            # Get sums of prev portions including and not including this portion
            sum_a = sum(map(lambda i: portions[i][1], range(ind)))
            sum_b = sum_a + portions[ind][1]
            # Get thresholds
            threshold_a = norm.ppf(sum_a, loc=0.0, scale=1.0)
            threshold_b = norm.ppf(sum_b, loc=0.0, scale=1.0)
            # Sample truncated norm
            df.loc[i, categorical_col] = truncnorm.rvs(threshold_a,
                                                       threshold_b)

    # estimate covariance matrix
    estimator = OAS()
    estimator.fit(df.values)

    cov = pd.DataFrame(estimator.covariance_,
                       index=df.columns,
                       columns=df.columns)
    return cov
예제 #20
0
    def get_stages(self):
        from sklearn.covariance import (EmpiricalCovariance, EllipticEnvelope,
                                        LedoitWolf, MinCovDet, OAS,
                                        ShrunkCovariance)
        from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler
        from srom.anomaly_detection.generalized_anomaly_model import GeneralizedAnomalyModel
        from srom.utils.no_op import NoOp

        return [
            [('skipscaling', NoOp()), ('standardscaler', StandardScaler()),
             ('robustscaler', RobustScaler()),
             ('minmaxscaling', MinMaxScaler())],
            [
                # Covariance Structure based Anomaly Models
                ('empiricalcovariance',
                 GeneralizedAnomalyModel(base_learner=EmpiricalCovariance(),
                                         fit_function='fit',
                                         predict_function='mahalanobis',
                                         score_sign=1)),
                ('ellipticenvelope',
                 GeneralizedAnomalyModel(base_learner=EllipticEnvelope(),
                                         fit_function='fit',
                                         predict_function='mahalanobis',
                                         score_sign=1)),
                ('ledoitwolf',
                 GeneralizedAnomalyModel(base_learner=LedoitWolf(),
                                         fit_function='fit',
                                         predict_function='mahalanobis',
                                         score_sign=1)),
                ('mincovdet',
                 GeneralizedAnomalyModel(base_learner=MinCovDet(),
                                         fit_function='fit',
                                         predict_function='mahalanobis',
                                         score_sign=1)),
                ('oas',
                 GeneralizedAnomalyModel(base_learner=OAS(),
                                         fit_function='fit',
                                         predict_function='mahalanobis',
                                         score_sign=1)),
                ('shrunkcovariance',
                 GeneralizedAnomalyModel(base_learner=ShrunkCovariance(),
                                         fit_function='fit',
                                         predict_function='mahalanobis',
                                         score_sign=1)),
            ]
        ]
예제 #21
0
    def __init__(self, template, shrinkage='oas', center=True, cov_i=None):
        self.template = np.asarray(template).ravel()[:, np.newaxis]
        self.center = center

        if center:
            self.template -= self.template.mean()

        if shrinkage == 'oas':
            self.cov = OAS()
        elif shrinkage == 'lw':
            self.cov = LedoitWolf()
        elif shrinkage == 'none':
            self.cov = EmpiricalCovariance()
        elif type(shrinkage) == float or type(shrinkage) == int:
            self.cov = ShrunkCovariance(shrinkage=shrinkage)
        else:
            raise ValueError('Invalid value for shrinkage parameter.')

        self.cov_i = cov_i
예제 #22
0
    def fit(self, data, dataset):
        """
        data: array, data to fit cov on
        dataset: string, nametag of the data (e.g. 'mnist')
        """

        self.dataset = dataset
        self.mean = np.expand_dims(data.mean(axis=0), 0)

        if self.mode == 'ML':
            self.cov = EmpiricalCovariance().fit(data).covariance_
        elif self.mode == 'OAS':
            self.cov = OAS().fit(data).covariance_
        elif self.mode == 'LW':
            self.cov = LedoitWolf().fit(data).covariance_
        elif self.mode == 'NERCOME':
            self.cov = self.nercome_estimator(data)
        else:
            raise ValueError

        return True
def compute_connectivity_voxel(roi, voxel, conn):
    """ Returns connectivity of one voxel for a given roi
    """

    if conn == 'gl':
        fc = GraphLassoCV(max_iter=1000)
    elif conn == 'lw':
        fc = LedoitWolf()
    elif conn == 'oas':
        fc = OAS()
    elif conn == 'scov':
        fc = ShrunkCovariance()

    ts = np.array([roi, voxel]).T

    if conn == 'corr' or conn == 'pcorr':
        cov = np.corrcoef(ts)[0, 1]
    else:
        fc.fit(ts)
        cov = fc.covariance_[0, 0]

    return cov
def covariance_estimator(matrix,
                         method='ledoit-wolf',
                         assume_centered=True,
                         store_precision=True,
                         **kwargs):
    """
    Return a pre-fit estimator for covariance from one of the scikit-learn estimators

    :param matrix: matrix to fit covariance to
    :param method: method one of `SUPPORTED_SKLEARN_COVARIANCE_ESTIMATORS`
    :param assume_centered: whether to assume data to be centered
    :param store_precision: if true, computes precision matrix (i.e. the inverse covariance) too
    :param kwargs: other kwargs to pass to estimator
    :return:
    """
    estimator = None

    if method == 'ledoit-wolf':
        estimator = LedoitWolf(assume_centered=assume_centered,
                               store_precision=store_precision,
                               **kwargs)
    elif method == 'oas':
        estimator = OAS(assume_centered=assume_centered,
                        store_precision=store_precision,
                        **kwargs)
    elif method == 'mincovdet':
        estimator = MinCovDet(assume_centered=assume_centered,
                              store_precision=store_precision,
                              **kwargs)
    elif method == 'empirical':
        estimator = EmpiricalCovariance(assume_centered=assume_centered,
                                        store_precision=store_precision,
                                        **kwargs)
    else:
        raise Exception('Unsupported estimator {!r}'.format(estimator))

    estimator.fit(matrix.T)

    return estimator
예제 #25
0
repeat = 100
lw_mse = np.zeros((n_samples_range.size, repeat))
oa_mse = np.zeros((n_samples_range.size, repeat))
lw_shrinkage = np.zeros((n_samples_range.size, repeat))
oa_shrinkage = np.zeros((n_samples_range.size, repeat))
for i, n_samples in enumerate(n_samples_range):
    for j in range(repeat):
        X = np.dot(
            np.random.normal(size=(n_samples, n_features)), coloring_matrix.T)

        lw = LedoitWolf(store_precision=False)
        lw.fit(X, assume_centered=True)
        lw_mse[i,j] = lw.error_norm(real_cov, scaling=False)
        lw_shrinkage[i,j] = lw.shrinkage_

        oa = OAS(store_precision=False)
        oa.fit(X, assume_centered=True)
        oa_mse[i,j] = oa.error_norm(real_cov, scaling=False)
        oa_shrinkage[i,j] = oa.shrinkage_

# plot MSE
pl.subplot(2,1,1)
pl.errorbar(n_samples_range, lw_mse.mean(1), yerr=lw_mse.std(1),
            label='Ledoit-Wolf', color='g')
pl.errorbar(n_samples_range, oa_mse.mean(1), yerr=oa_mse.std(1),
            label='OAS', color='r')
pl.ylabel("Squared error")
pl.legend(loc="upper right")
pl.title("Comparison of covariance estimators")
pl.xlim(5, 31)
예제 #26
0
def test_oas():
    # Tests OAS module on a simple dataset.
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    oa = OAS(assume_centered=True)
    oa.fit(X_centered)
    shrinkage_ = oa.shrinkage_
    score_ = oa.score(X_centered)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_centered,
                                                 assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS(assume_centered=True)
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal((X_1d**2).sum() / n_samples, oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False, assume_centered=True)
    oa.fit(X_centered)
    assert_almost_equal(oa.score(X_centered), score_, 4)
    assert (oa.precision_ is None)

    # Same tests without assuming centered data--------------------------------
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X)
    assert_almost_equal(oa.shrinkage_, shrinkage_, 4)
    assert_almost_equal(oa.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

    # test with one sample
    # FIXME I don't know what this test does
    X_1sample = np.arange(5)
    oa = OAS()
    assert_warns(UserWarning, oa.fit, X_1sample)
    assert_array_almost_equal(oa.covariance_,
                              np.zeros(shape=(5, 5), dtype=np.float64))

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X)
    assert_almost_equal(oa.score(X), score_, 4)
    assert (oa.precision_ is None)
예제 #27
0
def test_oas():
    # Tests OAS module on a simple dataset.
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    oa = OAS(assume_centered=True)
    oa.fit(X_centered)
    shrinkage_ = oa.shrinkage_
    score_ = oa.score(X_centered)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_centered,
                                                 assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0:1]
    oa = OAS(assume_centered=True)
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_1d, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False, assume_centered=True)
    oa.fit(X_centered)
    assert_almost_equal(oa.score(X_centered), score_, 4)
    assert(oa.precision_ is None)

    # Same tests without assuming centered data--------------------------------
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X)
    assert_almost_equal(oa.shrinkage_, shrinkage_, 4)
    assert_almost_equal(oa.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shrinkage_from_mle = oas(X)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_1d)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

    # test with one sample
    # warning should be raised when using only 1 sample
    X_1sample = np.arange(5).reshape(1, 5)
    oa = OAS()
    assert_warns(UserWarning, oa.fit, X_1sample)
    assert_array_almost_equal(oa.covariance_,
                              np.zeros(shape=(5, 5), dtype=np.float64))

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X)
    assert_almost_equal(oa.score(X), score_, 4)
    assert(oa.precision_ is None)
예제 #28
0
def cov2corr(cov):
    std_ = np.sqrt(np.diag(cov))
    corr = cov / np.outer(std_, std_)
    return corr


if has_sklearn:
    from sklearn.covariance import LedoitWolf, OAS, MCD

    lw = LedoitWolf(store_precision=False)
    lw.fit(rr, assume_centered=False)
    cov_lw = lw.covariance_
    corr_lw = cov2corr(cov_lw)

    oas = OAS(store_precision=False)
    oas.fit(rr, assume_centered=False)
    cov_oas = oas.covariance_
    corr_oas = cov2corr(cov_oas)

    mcd = MCD()  #.fit(rr, reweight=None)
    mcd.fit(rr, assume_centered=False)
    cov_mcd = mcd.covariance_
    corr_mcd = cov2corr(cov_mcd)

    titles = ['raw correlation', 'lw', 'oas', 'mcd']
    normcolor = None
    fig = plt.figure()
    for i, c in enumerate([rrcorr, corr_lw, corr_oas, corr_mcd]):
        #for i, c in enumerate([np.cov(rr, rowvar=0), cov_lw, cov_oas, cov_mcd]):
        ax = fig.add_subplot(2, 2, i + 1)
예제 #29
0
print timecourse_files

# roll through the subjects
print np.shape(timecourse_data)[0]
for i in range(np.shape(timecourse_data)[0]) :
#for i in range(10) :

    print i
    
    # extract the timecourses for this subejct
    subject_timecourses = timecourse_data[i, : ,:]
    #print np.shape(subject_timecourses)
    
    # calculate Pearson covariance
    X = scale(subject_timecourses, axis=1)
    cov = np.dot(X, np.transpose(X)) / np.shape(X)[1]
    print cov[:5, :5]
    print logm(cov)[:5, :5]
    
    # calculate sparse inverse covariance (precision) matrix
    model = OAS(store_precision=False, assume_centered=True)
    model.fit(np.transpose(X))
    cov = model.covariance_
    OAS_matrices[i, :] = np.reshape(cov, (1, 8100))
    #print cov[:5, :5]
    foo = logm(cov)
    #print logm(cov[:5, :5])
    
    
## save the data
np.savetxt('/home/jonyoung/IoP_data/Data/connectivity_data/OAS_data.csv', OAS_matrices, delimiter=',')
예제 #30
0
    # add non-discriminative features
    if n_features > 1:
        X = np.hstack([X, np.random.randn(n_samples, n_features - 1)])
    return X, y


acc_clf1, acc_clf2, acc_clf3 = [], [], []
n_features_range = range(1, n_features_max + 1, step)
for n_features in n_features_range:
    score_clf1, score_clf2, score_clf3 = 0, 0, 0
    for _ in range(n_averages):
        X, y = generate_data(n_train, n_features)

        clf1 = LinearDiscriminantAnalysis(solver="lsqr", shrinkage="auto").fit(X, y)
        clf2 = LinearDiscriminantAnalysis(solver="lsqr", shrinkage=None).fit(X, y)
        oa = OAS(store_precision=False, assume_centered=False)
        clf3 = LinearDiscriminantAnalysis(solver="lsqr", covariance_estimator=oa).fit(
            X, y
        )

        X, y = generate_data(n_test, n_features)
        score_clf1 += clf1.score(X, y)
        score_clf2 += clf2.score(X, y)
        score_clf3 += clf3.score(X, y)

    acc_clf1.append(score_clf1 / n_averages)
    acc_clf2.append(score_clf2 / n_averages)
    acc_clf3.append(score_clf3 / n_averages)

features_samples_ratio = np.array(n_features_range) / n_train
예제 #31
0
def test_oas():
    """Tests OAS module on a simple dataset.

    """
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X, assume_centered=True)
    assert_almost_equal(oa.shrinkage_, 0.018740, 4)
    assert_almost_equal(oa.score(X, assume_centered=True), -5.03605, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X, assume_centered=True)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d, assume_centered=True)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X, assume_centered=True)
    assert_almost_equal(oa.score(X, assume_centered=True), -5.03605, 4)
    assert(oa.precision_ is None)

    ### Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X)
    assert_almost_equal(oa.shrinkage_, 0.020236, 4)
    assert_almost_equal(oa.score(X), 2.079025, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X)
    assert_almost_equal(oa.score(X), 2.079025, 4)
    assert(oa.precision_ is None)
X_train = np.dot(base_X_train, coloring_matrix)
X_test = np.dot(base_X_test, coloring_matrix)

###############################################################################
# Compute Ledoit-Wolf and Covariances on a grid of shrinkages

from sklearn.covariance import LedoitWolf, OAS, ShrunkCovariance, \
    log_likelihood, empirical_covariance

# Ledoit-Wolf optimal shrinkage coefficient estimate
lw = LedoitWolf()
loglik_lw = lw.fit(X_train, assume_centered=True).score(
    X_test, assume_centered=True)

# OAS coefficient estimate
oa = OAS()
loglik_oa = oa.fit(X_train, assume_centered=True).score(
    X_test, assume_centered=True)

# spanning a range of possible shrinkage coefficient values
shrinkages = np.logspace(-3, 0, 30)
negative_logliks = [-ShrunkCovariance(shrinkage=s).fit(
        X_train, assume_centered=True).score(X_test, assume_centered=True) \
                         for s in shrinkages]

# getting the likelihood under the real model
real_cov = np.dot(coloring_matrix.T, coloring_matrix)
emp_cov = empirical_covariance(X_train)
loglik_real = -log_likelihood(emp_cov, linalg.inv(real_cov))

###############################################################################
예제 #33
0
def test_oas():
    """Tests OAS module on a simple dataset.

    """
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    oa = OAS(assume_centered=True)
    oa.fit(X_centered)
    shrinkage_ = oa.shrinkage_
    score_ = oa.score(X_centered)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_centered,
                                                 assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS(assume_centered=True)
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal((X_1d**2).sum() / n_samples, oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False, assume_centered=True)
    oa.fit(X_centered)
    assert_almost_equal(oa.score(X_centered), score_, 4)
    assert (oa.precision_ is None)

    ### Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X)
    assert_almost_equal(oa.shrinkage_, shrinkage_, 4)
    assert_almost_equal(oa.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

    # test with one sample
    X_1sample = np.arange(5)
    oa = OAS()
    with warnings.catch_warnings(record=True):
        oa.fit(X_1sample)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X)
    assert_almost_equal(oa.score(X), score_, 4)
    assert (oa.precision_ is None)
예제 #34
0
def test_oas():
    """Tests OAS module on a simple dataset.

    """
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    oa = OAS(assume_centered=True)
    oa.fit(X_centered)
    shrinkage_ = oa.shrinkage_
    score_ = oa.score(X_centered)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_centered,
                                                 assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS(assume_centered=True)
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False, assume_centered=True)
    oa.fit(X_centered)
    assert_almost_equal(oa.score(X_centered), score_, 4)
    assert(oa.precision_ is None)

    ### Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X)
    assert_almost_equal(oa.shrinkage_, shrinkage_, 4)
    assert_almost_equal(oa.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

    # test with one sample
    X_1sample = np.arange(5)
    oa = OAS()
    with warnings.catch_warnings(record=True):
        oa.fit(X_1sample)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X)
    assert_almost_equal(oa.score(X), score_, 4)
    assert(oa.precision_ is None)
loglik_real = -log_likelihood(emp_cov, linalg.inv(real_cov))

# #############################################################################
# Compare different approaches to setting the parameter

# GridSearch for an optimal shrinkage coefficient
tuned_parameters = [{'shrinkage': shrinkages}]
cv = GridSearchCV(ShrunkCovariance(), tuned_parameters, cv=5)
cv.fit(X_train)

# Ledoit-Wolf optimal shrinkage coefficient estimate
lw = LedoitWolf()
loglik_lw = lw.fit(X_train).score(X_test)

# OAS coefficient estimate
oa = OAS()
loglik_oa = oa.fit(X_train).score(X_test)

# #############################################################################
# Plot results
fig = plt.figure()
plt.title("Regularized covariance: likelihood and shrinkage coefficient")
plt.xlabel('Regularization parameter: shrinkage coefficient')
plt.ylabel('Error: negative log-likelihood on test data')
# range shrinkage curve
plt.loglog(shrinkages, negative_logliks, label="Negative log-likelihood")

plt.plot(plt.xlim(), 2 * [loglik_real], '--r',
         label="Real covariance likelihood")

# adjust view
예제 #36
0
    args = parser.parse_args()

    if args.verbose:
        print('sys.argv:')
        print(sys.argv)
        print()
        print('numpy version:', np.__version__)
        print('pandas version:', pd.__version__)
        print('scipy version:', sp.__version__)
        print()

    gene_expr_raw = pd.read_table(args.data)
    gene_expr = gene_expr_raw.T

    X_centered = (gene_expr - gene_expr.mean()) / np.sqrt(gene_expr.var())
    oa = OAS(store_precision=True, assume_centered=True)
    gene_expr_OAS_corr = oa.fit(X_centered)

    n_genes = gene_expr_OAS_corr.covariance_.shape[1]
    g = Graph(directed=False)
    g.add_vertex(n=n_genes)

    spearman = g.new_ep("double", 0)
    pval = g.new_ep("double", 0)
    genes = g.new_vertex_property(
        "string", np.array(np.array(gene_expr.columns, dtype="str")))
    g.vertex_properties["genes"] = genes

    for i in range(n_genes):
        for j in range(i):
            spearman_r = sp.stats.spearmanr(X_centered.iloc[:, i],
예제 #37
0
repeat = 100
lw_mse = np.zeros((n_samples_range.size, repeat))
oa_mse = np.zeros((n_samples_range.size, repeat))
lw_shrinkage = np.zeros((n_samples_range.size, repeat))
oa_shrinkage = np.zeros((n_samples_range.size, repeat))
for i, n_samples in enumerate(n_samples_range):
    for j in range(repeat):
        X = np.dot(np.random.normal(size=(n_samples, n_features)),
                   coloring_matrix.T)

        lw = LedoitWolf(store_precision=False, assume_centered=True)
        lw.fit(X)
        lw_mse[i, j] = lw.error_norm(real_cov, scaling=False)
        lw_shrinkage[i, j] = lw.shrinkage_

        oa = OAS(store_precision=False, assume_centered=True)
        oa.fit(X)
        oa_mse[i, j] = oa.error_norm(real_cov, scaling=False)
        oa_shrinkage[i, j] = oa.shrinkage_

# plot MSE
plt.subplot(2, 1, 1)
plt.errorbar(
    n_samples_range,
    lw_mse.mean(1),
    yerr=lw_mse.std(1),
    label="Ledoit-Wolf",
    color="navy",
    lw=2,
)
plt.errorbar(
예제 #38
0
    def set_optimal_shrinkage_amount(self, X, method="cv", verbose=False):
        """Set optimal shrinkage amount according to chosen method.

        /!\ Could be rewritten with GridSearchCV.

        Parameters
        ----------
        X: array-like, shape = [n_samples, n_features]
          Training data, where n_samples is the number of samples
          and n_features is the number of features.
        method: float or str in {"cv", "lw", "oas"},
          The method used to set the shrinkage. If a floating value is provided
          that value is used. Otherwise, the selection is made according to
          the selected method.
          "cv" (default): 10-fold cross-validation.
                          (or Leave-One Out cross-validation if n_samples < 10)
          "lw": Ledoit-Wolf criterion
          "oas": OAS criterion
        verbose: bool,
          Verbose mode or not.

        Returns
        -------
        optimal_shrinkage: float,
          The optimal amount of shrinkage.

        """
        n_samples, n_features = X.shape
        if isinstance(method, str):
            std_shrinkage = np.trace(empirical_covariance(X)) / \
                (n_features * n_samples)
            self.std_shrinkage = std_shrinkage
        if method == "cv":
            from sklearn.covariance import log_likelihood
            n_samples, n_features = X.shape
            shrinkage_range = np.concatenate(
                ([0.], 10.**np.arange(-n_samples / n_features, -1, 0.5),
                 np.arange(0.05, 1., 0.05), np.arange(1., 20., 1.),
                 np.arange(20., 100, 5.), 10.**np.arange(2, 7, 0.5)))
            # get a "pure" active set with a standard shrinkage
            active_set_estimator = RMCDl2(shrinkage=std_shrinkage)
            active_set_estimator.fit(X)
            active_set = np.where(active_set_estimator.support_)[0]
            # split this active set in ten parts
            active_set = active_set[np.random.permutation(active_set.size)]
            if active_set.size >= 10:
                # ten fold cross-validation
                n_folds = 10
                fold_size = active_set.size / 10
            else:
                n_folds = active_set.size
                fold_size = 1
            log_likelihoods = np.zeros((shrinkage_range.size, n_folds))
            if verbose:
                print "*** Cross-validation"
            for trial in range(n_folds):
                if verbose:
                    print trial / float(n_folds)
                # define train and test sets
                train_set_indices = np.concatenate(
                    (np.arange(0, fold_size * trial),
                     np.arange(fold_size * (trial + 1), n_folds * fold_size)))
                train_set = X[active_set[train_set_indices]]
                test_set = X[active_set[np.arange(fold_size * trial,
                                                  fold_size * (trial + 1))]]
                # learn location and covariance estimates from train set
                # for several amounts of shrinkage
                for i, shrinkage in enumerate(shrinkage_range):
                    location = test_set.mean(0)
                    cov = empirical_covariance(train_set)
                    cov.flat[::(n_features + 1)] += shrinkage * std_shrinkage
                    # compute test data likelihood
                    log_likelihoods[i, trial] = log_likelihood(
                        empirical_covariance(test_set - location,
                                             assume_centered=True), pinvh(cov))
            optimal_shrinkage = shrinkage_range[np.argmax(
                log_likelihoods.mean(1))]
            self.shrinkage = optimal_shrinkage * std_shrinkage
            self.shrinkage_cst = optimal_shrinkage
            if verbose:
                print "optimal shrinkage: %g (%g x lambda(= %g))" \
                    % (self.shrinkage, optimal_shrinkage, std_shrinkage)
            self.log_likelihoods = log_likelihoods
            self.shrinkage_range = shrinkage_range

            return shrinkage_range, log_likelihoods
        elif method == "oas":
            from sklearn.covariance import OAS
            rmcd = self.__init__(shrinkage=std_shrinkage)
            support = rmcd.fit(X).support_
            oas = OAS().fit(X[support])
            if oas.shrinkage_ == 1:
                self.shrinkage_cst = np.inf
            else:
                self.shrinkage_cst = oas.shrinkage_ / (1. - oas.shrinkage_)
            self.shrinkage = self.shrinkage_cst * std_shrinkage * n_features
        elif method == "lw":
            from sklearn.covariance import LedoitWolf
            rmcd = RMCDl2(self, h=self.h, shrinkage=std_shrinkage)
            support = rmcd.fit(X).support_
            lw = LedoitWolf().fit(X[support])
            if lw.shrinkage_ == 1:
                self.shrinkage_cst = np.inf
            else:
                self.shrinkage_cst = lw.shrinkage_ / (1. - lw.shrinkage_)
            self.shrinkage = self.shrinkage_cst * std_shrinkage * n_features
        else:
            pass
        return
예제 #39
0

def cov2corr(cov):
    std_ = np.sqrt(np.diag(cov))
    corr = cov / np.outer(std_, std_)
    return corr

if has_sklearn:
    from sklearn.covariance import LedoitWolf, OAS, MCD

    lw = LedoitWolf(store_precision=False)
    lw.fit(rr, assume_centered=False)
    cov_lw = lw.covariance_
    corr_lw = cov2corr(cov_lw)

    oas = OAS(store_precision=False)
    oas.fit(rr, assume_centered=False)
    cov_oas = oas.covariance_
    corr_oas = cov2corr(cov_oas)

    mcd = MCD()#.fit(rr, reweight=None)
    mcd.fit(rr, assume_centered=False)
    cov_mcd = mcd.covariance_
    corr_mcd = cov2corr(cov_mcd)

    titles = ['raw correlation', 'lw', 'oas', 'mcd']
    normcolor = None
    fig = plt.figure()
    for i, c in enumerate([rrcorr, corr_lw, corr_oas, corr_mcd]):
    #for i, c in enumerate([np.cov(rr, rowvar=0), cov_lw, cov_oas, cov_mcd]):
        ax = fig.add_subplot(2,2,i+1)
예제 #40
0
repeat = 100
lw_mse = np.zeros((n_samples_range.size, repeat))
oa_mse = np.zeros((n_samples_range.size, repeat))
lw_shrinkage = np.zeros((n_samples_range.size, repeat))
oa_shrinkage = np.zeros((n_samples_range.size, repeat))
for i, n_samples in enumerate(n_samples_range):
    for j in range(repeat):
        X = np.dot(
            np.random.normal(size=(n_samples, n_features)), coloring_matrix.T)

        lw = LedoitWolf(store_precision=False, assume_centered=True)
        lw.fit(X)
        lw_mse[i, j] = lw.error_norm(real_cov, scaling=False)
        lw_shrinkage[i, j] = lw.shrinkage_

        oa = OAS(store_precision=False, assume_centered=True)
        oa.fit(X)
        oa_mse[i, j] = oa.error_norm(real_cov, scaling=False)
        oa_shrinkage[i, j] = oa.shrinkage_

# plot MSE
plt.subplot(2, 1, 1)
plt.errorbar(n_samples_range, lw_mse.mean(1), yerr=lw_mse.std(1),
             label='Ledoit-Wolf', color='g')
plt.errorbar(n_samples_range, oa_mse.mean(1), yerr=oa_mse.std(1),
             label='OAS', color='r')
plt.ylabel("Squared error")
plt.legend(loc="upper right")
plt.title("Comparison of covariance estimators")
plt.xlim(5, 31)