Esempio n. 1
0
def simulateLogNormal(data, covtype='Estimate', nsamples=2000, **kwargs):
    """

    :param data:
    :param covtype: Type of covariance matrix estimator. Allowed types are:
        - Estimate (default):
        - Diagonal:
        - Shrinkage OAS:
    :param int nsamples: Number of simulated samples to draw
    :return: simulated data and empirical covariance est
    """

    try:
        # Offset data to make sure there are no 0 values for log transform
        offset = np.min(data) + 1
        offdata = data + offset

        # log on the offsetted data
        logdata = np.log(offdata)
        # Get the means
        meanslog = np.mean(logdata, axis=0)

        # Specify covariance
        # Regular covariance estimator
        if covtype == "Estimate":
            covlog = np.cov(logdata, rowvar=0)
        # Shrinkage covariance estimator, using LedoitWolf
        elif covtype == "ShrinkageLedoitWolf":
            scov = LedoitWolf()
            scov.fit(logdata)
            covlog = scov.covariance_
        elif covtype == "ShrinkageOAS":
            scov = OAS()
            scov.fit(logdata)
            covlog = scov.covariance_

        # Diagonal covariance matrix (no between variable correlation)
        elif covtype == "Diagonal":
            covlogdata = np.var(
                logdata, axis=0)  #get variance of log data by each column
            covlog = np.diag(
                covlogdata
            )  #generate a matrix with diagonal of variance of log Data
        else:
            raise ValueError('Unknown Covariance type')

        simData = np.random.multivariate_normal(meanslog, covlog, nsamples)
        simData = np.exp(simData)
        simData -= offset

        ##Set to 0 negative values
        simData[np.where(simData < 0)] = 0
        # work out the correlation of matrix by columns, each column is a variable
        corrMatrix = np.corrcoef(simData, rowvar=0)

        return simData, corrMatrix

    except Exception as exp:
        raise exp
def test_ledoit_wolf_small():
    # Compare our blocked implementation to the naive implementation
    X_small = X[:, :4]
    lw = LedoitWolf()
    lw.fit(X_small)
    shrinkage_ = lw.shrinkage_

    assert_almost_equal(shrinkage_, _naive_ledoit_wolf_shrinkage(X_small))
Esempio n. 3
0
def test_ledoit_wolf_small():
    # Compare our blocked implementation to the naive implementation
    X_small = X[:, :4]
    lw = LedoitWolf()
    lw.fit(X_small)
    shrinkage_ = lw.shrinkage_

    assert_almost_equal(shrinkage_, _naive_ledoit_wolf_shrinkage(X_small))
Esempio n. 4
0
File: ctm.py Progetto: happyche/code
	def maximization(self):
		# mean maximization
		for i in range(self._K):
			mu[i] = mu_ss[i] / ndata_ss
		# covariance maximization
		for i in range(self._K):
			for j in range(self._K):
				cov[i,j] = (1.0/ ndata_ss) * cov_ss[i,j] + ndata_ss * mu[i] * mu[j] - mu_ss[i] * mu[j] - mu_ss[j] * mu[i]
		# covariance shrinkage
		lw = LedoitWolf()
		cov_result = lw.fit(cov,assume_centered=True).covariance_
		inv_cov = np.linalg.inv(cov_result)
		log_det_inv_cov = np.log(np.linalg.det(inv_cov))

		# topic maximization
		for i in range(self._K):
			sum_m = 0 
			for j in range(self._W):
				sum_m += beta_ss[i,j]

			if sum_m == 0:
				sum_m = -1000 * self._W
			else:
				sum_m = np.log(sum_m)
			for j in range(self._W):
				log_beta[i,j] = np.log(beta_ss[i,j] - sum_m)
Esempio n. 5
0
def LW_est(X):
    '''
    Ledoit-Wolf optimal shrinkage coefficient estimate
    X_size = (n_samples, n_features)
    '''

    lw = LedoitWolf()
    cov_lw = lw.fit(X).covariance_

    return cov_lw
Esempio n. 6
0
def covarianceEstimation(daily_returns, cov_estimator):
    lw = LedoitWolf()
    if cov_estimator == "shrinkage":
        return lw.fit(daily_returns).covariance_
    elif cov_estimator == "empirical":
        return daily_returns.cov()
    elif cov_estimator == "multifactor":
        # FIXME
        return None
    else:
        raise Exception("协方差矩阵类型为[shrinkage,empirical,multifactor]")
Esempio n. 7
0
def partial_corrconn(activity_matrix,
                     estimator='EmpiricalCovariance',
                     target_ts=None):
    """
    activity_matrix:    Activity matrix should be nodes X time
    target_ts:             Optional, used when only a single target time series (returns 1 X nnodes matrix)
    estimator:      can be either 'Empirical covariance' the default, or 'LedoitWolf' partial correlation with Ledoit-Wolf shrinkage

    Output: connectivity_mat, formatted targets X sources
    Credit goes to nilearn connectivity_matrices.py which contains code that was simplified for this use.
    """

    nnodes = activity_matrix.shape[0]
    timepoints = activity_matrix.shape[1]
    if nnodes > timepoints:
        print('activity_matrix shape: ', np.shape(activity_matrix))
        raise Exception(
            'More nodes (regressors) than timepoints! Use regularized regression'
        )
    if 2 * nnodes > timepoints:
        print('activity_matrix shape: ', np.shape(activity_matrix))
        print('Consider using a shrinkage method')

    if target_ts is None:
        connectivity_mat = np.zeros((nnodes, nnodes))
        # calculate covariance
        if estimator is 'LedoitWolf':
            cov_estimator = LedoitWolf(store_precision=False)
        elif estimator is 'EmpiricalCovariance':
            cov_estimator = EmpiricalCovariance(store_precision=False)
        covariance = cov_estimator.fit(activity_matrix.T).covariance_

        # calculate precision
        precision = linalg.inv(covariance)

        # precision to partial corr
        diagonal = np.atleast_2d(1. / np.sqrt(np.diag(precision)))
        correlation = precision * diagonal * diagonal.T

        # Force exact 0. on diagonal
        np.fill_diagonal(correlation, 0.)
        connectivity_mat = -correlation
    else:
        #Computing values for a single target node
        connectivity_mat = np.zeros((nnodes, 1))
        X = activity_matrix.T
        y = target_ts
        #Note: LinearRegression fits intercept by default (intercept beta not included in coef_ output)
        reg = LinearRegression().fit(X, y)
        connectivity_mat = reg.coef_

    return connectivity_mat
def covariance_estimator(matrix,
                         method='ledoit-wolf',
                         assume_centered=True,
                         store_precision=True,
                         **kwargs):
    """
    Return a pre-fit estimator for covariance from one of the scikit-learn estimators

    :param matrix: matrix to fit covariance to
    :param method: method one of `SUPPORTED_SKLEARN_COVARIANCE_ESTIMATORS`
    :param assume_centered: whether to assume data to be centered
    :param store_precision: if true, computes precision matrix (i.e. the inverse covariance) too
    :param kwargs: other kwargs to pass to estimator
    :return:
    """
    estimator = None

    if method == 'ledoit-wolf':
        estimator = LedoitWolf(assume_centered=assume_centered,
                               store_precision=store_precision,
                               **kwargs)
    elif method == 'oas':
        estimator = OAS(assume_centered=assume_centered,
                        store_precision=store_precision,
                        **kwargs)
    elif method == 'mincovdet':
        estimator = MinCovDet(assume_centered=assume_centered,
                              store_precision=store_precision,
                              **kwargs)
    elif method == 'empirical':
        estimator = EmpiricalCovariance(assume_centered=assume_centered,
                                        store_precision=store_precision,
                                        **kwargs)
    else:
        raise Exception('Unsupported estimator {!r}'.format(estimator))

    estimator.fit(matrix.T)

    return estimator
Esempio n. 9
0
def max_IC_weight(ic_df,
                  factors_dict,
                  holding_period,
                  covariance_type="shrink"):
    """
    输入ic_df(ic值序列矩阵),指定持有期和滚动窗口,给出相应的多因子组合权重
    :param factors_dict: 若干因子组成的字典(dict),形式为:
                         {"factor_name_1":factor_1,"factor_name_2":factor_2}
                        每个因子值格式为一个pd.DataFrame,索引(index)为date,column为asset
    :param ic_df: ic值序列矩阵 (pd.Dataframe),索引(index)为datetime,columns为各因子名称。
             如:

                       BP	   CFP	   EP	  ILLIQUIDITY	REVS20	   SRMI	   VOL20
            date
            2016-06-24	0.165260	0.002198	0.085632	-0.078074	0.173832	0.214377	0.068445
            2016-06-27	0.165537	0.003583	0.063299	-0.048674	0.180890	0.202724	0.081748
            2016-06-28	0.135215	0.010403	0.059038	-0.034879	0.111691	0.122554	0.042489
            2016-06-29	0.068774	0.019848	0.058476	-0.049971	0.042805	0.053339	0.079592
            2016-06-30	0.039431	0.012271	0.037432	-0.027272	0.010902	0.077293	-0.050667

    :param holding_period: 持有周期(int)
    :param covariance_type:"shrink"/"simple" 协防差矩阵估算方式 Ledoit-Wolf压缩估计或简单估计
    :return: weight_df:使用Sample协方差矩阵估算方法得到的因子权重(pd.Dataframe),
             索引(index)为datetime,columns为待合成的因子名称。
    """
    weight_df = pd.DataFrame(index=ic_df.index, columns=ic_df.columns)
    lw = LedoitWolf()
    # 最大化第t天的ic,用到了截止到t+period的数据(算收益),
    # 算得的权重用于t+period的因子进行加权
    for dt in ic_df.index:
        f_dt = pd.concat([
            factors_dict[factor_name].loc[dt] for factor_name in ic_df.columns
        ],
                         axis=1).dropna()
        if len(f_dt) == 0:
            continue
        if covariance_type == "shrink":
            try:
                f_cov_mat = lw.fit(f_dt.as_matrix()).covariance_
            except:
                f_cov_mat = np.mat(np.cov(f_dt.T.as_matrix()).astype(float))
        else:
            f_cov_mat = np.mat(np.cov(f_dt.T.as_matrix()).astype(float))
        inv_f_cov_mat = np.linalg.inv(f_cov_mat)
        weight = inv_f_cov_mat * np.mat(ic_df.loc[dt].values).reshape(
            len(inv_f_cov_mat), 1)
        weight = np.array(weight.reshape(len(weight), ))[0]
        weight_df.ix[dt] = weight / np.sum(np.abs(weight))

    return weight_df.shift(holding_period)
Esempio n. 10
0
	def maximization(self):
		'''
		M-step of EM algorithm, use scikit.learn's LedoitWolf method to perfom
		covariance matrix shrinkage.
		Arguments:
			sufficient statistics, i.e. model parameters
		Returns:
			the updated sufficient statistics which all in self definition, so no return values
		'''
		logger.info("running maximization function")
		logger.info("mean maximization")
		mu = np.divide(self.mu, self.ndata)
		logger.info("covariance maximization")
		for i in range(self._K):
			for j in range(self._K):
				self.cov[i, j] = (1.0 / self.ndata) * self.cov[i, j] + self.ndata * mu[i] * mu[j] - self.mu[i] * mu[j] - self.mu[j] * mu[i]
		logger.info(" performing covariance shrinkage using sklearn module")
		lw = LedoitWolf()
		cov_result = lw.fit(self.cov, assume_centered=True).covariance_
		self.inv_cov = np.linalg.inv(cov_result)
		self.log_det_inv_cov = math_utli.safe_log(np.linalg.det(self.inv_cov))

		logger.info("topic maximization")
		for i in range(self._K):
			sum_m = 0
			sum_m += np.sum(self.beta, axis=0)[i]

			if sum_m == 0:
				sum_m = -1000 * self._W
			else:
				sum_m = np.log(sum_m)

			for j in range(self._W):
				self.log_beta[i, j] = math_utli.safe_log(self.beta[i, j] - sum_m)

		logger.info("write model parameters to file")
		logger.info("write gaussian")
		with open('ctm_nu', 'w') as ctm_nu_dump:
			cPickle.dump(self.nu, ctm_nu_dump)
		with open('ctm_cov', 'w') as ctm_cov_dump:
			cPickle.dump(self.cov, ctm_cov_dump)
		with open('ctm_inv_cov', 'w') as ctm_inv_cov_dump:
			cPickle.dump(self.inv_cov, ctm_inv_cov_dump)
		with open('ctm_log_det_inv_cov', 'w') as ctm_log_det_inv_cov_dump:
			cPickle.dump(self.log_det_inv_cov, ctm_log_det_inv_cov_dump)
		logger.info("write topic matrix")
		with open('ctm_log_beta', 'w') as ctm_log_beta_dump:
			cPickle.dump(self.log_beta, ctm_log_beta_dump)
Esempio n. 11
0
def max_IR_weight(ic_df,
                  holding_period,
                  rollback_period=120,
                  covariance_type="shrink"):
    """
    输入ic_df(ic值序列矩阵),指定持有期和滚动窗口,给出相应的多因子组合权重
    :param ic_df: ic值序列矩阵 (pd.Dataframe),索引(index)为datetime,columns为各因子名称。
             如:

                       BP	   CFP	   EP	  ILLIQUIDITY	REVS20	   SRMI	   VOL20
            date
            2016-06-24	0.165260	0.002198	0.085632	-0.078074	0.173832	0.214377	0.068445
            2016-06-27	0.165537	0.003583	0.063299	-0.048674	0.180890	0.202724	0.081748
            2016-06-28	0.135215	0.010403	0.059038	-0.034879	0.111691	0.122554	0.042489
            2016-06-29	0.068774	0.019848	0.058476	-0.049971	0.042805	0.053339	0.079592
            2016-06-30	0.039431	0.012271	0.037432	-0.027272	0.010902	0.077293	-0.050667

    :param holding_period: 持有周期(int)
    :param rollback_period: 滚动窗口,即计算每一天的因子权重时,使用了之前rollback_period下的IC时间序列来计算IC均值向量和IC协方差矩阵(int)。
    :param covariance_type:"shrink"/"simple" 协防差矩阵估算方式 Ledoit-Wolf压缩估计或简单估计
    :return: weight_df:使用Sample协方差矩阵估算方法得到的因子权重(pd.Dataframe),
             索引(index)为datetime,columns为待合成的因子名称。
    """
    # 最大化t-n ~ t天的ic_ir,用到了截止到t+period的数据(算收益),
    # 算得的权重用于t+period的因子进行加权
    n = rollback_period
    weight_df = pd.DataFrame(index=ic_df.index, columns=ic_df.columns)
    lw = LedoitWolf()
    for dt in ic_df.index:
        ic_dt = ic_df[ic_df.index <= dt].tail(n)
        if len(ic_dt) < n:
            continue
        if covariance_type == "shrink":
            try:
                ic_cov_mat = lw.fit(ic_dt.as_matrix()).covariance_
            except:
                ic_cov_mat = np.mat(np.cov(ic_dt.T.as_matrix()).astype(float))
        else:
            ic_cov_mat = np.mat(np.cov(ic_dt.T.as_matrix()).astype(float))
        inv_ic_cov_mat = np.linalg.inv(ic_cov_mat)
        weight = inv_ic_cov_mat * np.mat(ic_dt.mean().values).reshape(
            len(inv_ic_cov_mat), 1)
        weight = np.array(weight.reshape(len(weight), ))[0]
        weight_df.ix[dt] = weight / np.sum(np.abs(weight))

    return weight_df.shift(holding_period)
Esempio n. 12
0
    def get_ic_weight_shrink_df(self,
                                ic_df,
                                holding_period,
                                rollback_period=120):
        """
        输入ic_df(ic值序列矩阵),指定持有期和滚动窗口,给出相应的多因子组合权重
        :param ic_df: ic值序列矩阵 (pd.Dataframe),索引(index)为datetime,columns为各因子名称。
                 如:

                           BP	   CFP	   EP	  ILLIQUIDITY	REVS20	   SRMI	   VOL20
                date
                2016-06-24	0.165260	0.002198	0.085632	-0.078074	0.173832	0.214377	0.068445
                2016-06-27	0.165537	0.003583	0.063299	-0.048674	0.180890	0.202724	0.081748
                2016-06-28	0.135215	0.010403	0.059038	-0.034879	0.111691	0.122554	0.042489
                2016-06-29	0.068774	0.019848	0.058476	-0.049971	0.042805	0.053339	0.079592
                2016-06-30	0.039431	0.012271	0.037432	-0.027272	0.010902	0.077293	-0.050667

        :param holding_period: 持有周期(int)
        :param rollback_period: 滚动窗口,即计算每一天的因子权重时,使用了之前rollback_period下的IC时间序列来计算IC均值向量和IC协方差矩阵(int)。
        :return: ic_weight_shrink_df:使用Ledoit-Wolf压缩方法得到的因子权重(pd.Dataframe),
                 索引(index)为datetime,columns为待合成的因子名称。
        """

        from sklearn.covariance import LedoitWolf
        import numpy as np

        n = rollback_period
        ic_weight_shrink_df = pd.DataFrame(index=ic_df.index, columns=ic_df.columns)
        lw = LedoitWolf()
        for dt in ic_df.index:
            ic_dt = ic_df[ic_df.index < dt].tail(n)
            if len(ic_dt) < n:
                continue
            try:
                ic_cov_mat = lw.fit(ic_dt.as_matrix()).covariance_
            except:
                ic_cov_mat = np.mat(np.cov(ic_dt.T.as_matrix()).astype(float))
            inv_ic_cov_mat = np.linalg.inv(ic_cov_mat)
            weight = inv_ic_cov_mat * np.mat(ic_dt.mean()).reshape(len(inv_ic_cov_mat), 1)
            weight = np.array(weight.reshape(len(weight), ))[0]
            ic_weight_shrink_df.ix[dt] = weight / np.sum(weight)

        return ic_weight_shrink_df.shift(holding_period)
def prepareProblem(filePath, shrinkage=False, subset=False, subsetSize=0):
    # Import data from .csv
    df = pd.read_csv(filePath, sep=';')
    df.index = df.date
    df = df.drop('date', axis=1)

    # Subset, if called via subset == True
    if subset == True:
        df = df.tail(subsetSize)

    # Estimate covariance using Empirical/MLE
    # Expected input is returns, hence set: assume_centered = True
    mleFitted = empirical_covariance(X=df, assume_centered=True)
    sigma = mleFitted

    if shrinkage == True:
        # Estimate covariance using LedoitWolf, first create instance of object
        lw = LedoitWolf(assume_centered=True)
        lwFitted = lw.fit(X=df).covariance_
        sigma = lwFitted

    return sigma
Esempio n. 14
0
def prepareProblem(filePath, shrinkage=False, subset=False, subsetSize=0):
    # Import data from .csv
    df = pd.read_csv(filePath, sep=';')
    df.index = df.date
    df = df.drop('date', axis=1)

    # Subset, if called via subset == True
    if subset == True:
        df = df.tail(subsetSize)

    # Estimate covariance using Empirical/MLE
    # Expected input is returns, hence set: assume_centered = True
    mleFitted = empirical_covariance(X=df, assume_centered=True)
    sigma = mleFitted

    if shrinkage == True:
        # Estimate covariance using LedoitWolf, first create instance of object
        lw = LedoitWolf(assume_centered=True)
        lwFitted = lw.fit(X=df).covariance_
        sigma = lwFitted

    return sigma
Esempio n. 15
0
def Caculate_Weight_LW(fct_class_name,fct_class,n,dir,ic_df):
    store_path = dir + '/data_out/class_factor_weight/' + fct_class_name
    isExists = os.path.exists(store_path)
    if not isExists:
        os.makedirs(store_path)
    ic_weight_shrink_df = pd.DataFrame(index=ic_df.index, columns=ic_df.columns)
    e = 1e-10  # 非常接近0的值
    lw = LedoitWolf()
    for dt in ic_df.index:
        ic_dt = ic_df[ic_df.index < dt].tail(n)
        if len(ic_dt) < n:
            continue
        ic_cov_mat = lw.fit(ic_dt.values).covariance_
        inv_ic_cov_mat = np.linalg.inv(ic_cov_mat)
        weight=np.matmul(inv_ic_cov_mat,np.mat(ic_dt.mean()).reshape(len(inv_ic_cov_mat), 1))
        #weight = inv_ic_cov_mat * np.mat(ic_dt.mean()).reshape(len(inv_ic_cov_mat), 1)
        weight = np.array(weight.reshape(len(weight), ))[0]
        ic_weight_shrink_df.ix[dt] = weight / np.sum(weight)

        '''IC = np.array(ic_dt.mean()).reshape(4,1)
        fun = lambda W: (-(np.matmul(W.T, IC) / np.sqrt(W.T * ic_cov_mat * W)))[0][0]  # 约束函数
        cons = ({'type': 'ineq', 'fun': lambda W: W - e})
        W0 = np.random.rand(len(fct_class), 1)
        res = minimize(fun, W0, method='SLSQP', constraints=cons)
        ic_weight_shrink_df.ix[dt] = res.x'''

    ic_weight_shrink_df=ic_weight_shrink_df.dropna(axis=0,how='any')

    color = ['green', 'blue', 'orange', 'gray']
    for fct in fct_class:
        # ic_weight_df[fct]=np.array(ic_weight_df[fct])/np.array(ic_weight_df['Col_sum'])
        plt.plot(ic_weight_shrink_df.index, ic_weight_shrink_df[fct], color=color[fct_class.index(fct)])
    plt.legend()
    plt.title('Factor weight using sample covariance')
    plt.savefig(store_path + '/weight_maxIR_LW.png')
    plt.close()

    return ic_weight_shrink_df
Esempio n. 16
0
def test_ledoit_wolf():
    """Tests LedoitWolf module on a simple dataset.

    """
    # test shrinkage coeff on a simple data set
    lw = LedoitWolf()
    lw.fit(X, assume_centered=True)
    assert_almost_equal(lw.shrinkage_, 0.00192, 4)
    assert_almost_equal(lw.score(X, assume_centered=True), -2.89795, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X,
                                                        assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
    scov.fit(X, assume_centered=True)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf()
    lw.fit(X_1d, assume_centered=True)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False)
    lw.fit(X, assume_centered=True)
    assert_almost_equal(lw.score(X, assume_centered=True), -2.89795, 4)
    assert(lw.precision_ is None)

    # Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    lw = LedoitWolf()
    lw.fit(X)
    assert_almost_equal(lw.shrinkage_, 0.007582, 4)
    assert_almost_equal(lw.score(X), 2.243483, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf()
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False)
    lw.fit(X)
    assert_almost_equal(lw.score(X), 2.2434839, 4)
    assert(lw.precision_ is None)
def test_connectivity_measure_outputs():
    n_subjects = 10
    n_features = 49
    n_samples = 200

    # Generate signals and compute covariances
    emp_covs = []
    ledoit_covs = []
    signals = []
    random_state = check_random_state(0)
    ledoit_estimator = LedoitWolf()
    for k in range(n_subjects):
        signal = random_state.randn(n_samples, n_features)
        signals.append(signal)
        signal -= signal.mean(axis=0)
        emp_covs.append((signal.T).dot(signal) / n_samples)
        ledoit_covs.append(ledoit_estimator.fit(signal).covariance_)

    kinds = ["correlation", "tangent", "precision",
             "partial correlation"]

    # Check outputs properties
    for cov_estimator, covs in zip([EmpiricalCovariance(), LedoitWolf()],
                                   [emp_covs, ledoit_covs]):
        input_covs = copy.copy(covs)
        for kind in kinds:
            conn_measure = ConnectivityMeasure(kind=kind,
                                               cov_estimator=cov_estimator)
            connectivities = conn_measure.fit_transform(signals)

            # Generic
            assert_true(isinstance(connectivities, np.ndarray))
            assert_equal(len(connectivities), len(covs))

            for k, cov_new in enumerate(connectivities):
                assert_array_equal(input_covs[k], covs[k])
                assert(is_spd(covs[k], decimal=7))

                # Positive definiteness if expected and output value checks
                if kind == "tangent":
                    assert_array_almost_equal(cov_new, cov_new.T)
                    gmean_sqrt = _map_eigenvalues(np.sqrt,
                                                  conn_measure.mean_)
                    assert(is_spd(gmean_sqrt, decimal=7))
                    assert(is_spd(conn_measure.whitening_, decimal=7))
                    assert_array_almost_equal(conn_measure.whitening_.dot(
                        gmean_sqrt), np.eye(n_features))
                    assert_array_almost_equal(gmean_sqrt.dot(
                        _map_eigenvalues(np.exp, cov_new)).dot(gmean_sqrt),
                        covs[k])
                elif kind == "precision":
                    assert(is_spd(cov_new, decimal=7))
                    assert_array_almost_equal(cov_new.dot(covs[k]),
                                              np.eye(n_features))
                elif kind == "correlation":
                    assert(is_spd(cov_new, decimal=7))
                    d = np.sqrt(np.diag(np.diag(covs[k])))
                    if cov_estimator == EmpiricalCovariance():
                        assert_array_almost_equal(d.dot(cov_new).dot(d),
                                                  covs[k])
                    assert_array_almost_equal(np.diag(cov_new),
                                              np.ones((n_features)))
                elif kind == "partial correlation":
                    prec = linalg.inv(covs[k])
                    d = np.sqrt(np.diag(np.diag(prec)))
                    assert_array_almost_equal(d.dot(cov_new).dot(d), -prec +
                                              2 * np.diag(np.diag(prec)))
Esempio n. 18
0
real_cov = toeplitz(r**np.arange(n_features))
coloring_matrix = cholesky(real_cov)

n_samples_range = np.arange(6, 31, 1)
repeat = 100
lw_mse = np.zeros((n_samples_range.size, repeat))
oa_mse = np.zeros((n_samples_range.size, repeat))
lw_shrinkage = np.zeros((n_samples_range.size, repeat))
oa_shrinkage = np.zeros((n_samples_range.size, repeat))
for i, n_samples in enumerate(n_samples_range):
    for j in range(repeat):
        X = np.dot(
            np.random.normal(size=(n_samples, n_features)), coloring_matrix.T)

        lw = LedoitWolf(store_precision=False)
        lw.fit(X, assume_centered=True)
        lw_mse[i,j] = lw.error_norm(real_cov, scaling=False)
        lw_shrinkage[i,j] = lw.shrinkage_

        oa = OAS(store_precision=False)
        oa.fit(X, assume_centered=True)
        oa_mse[i,j] = oa.error_norm(real_cov, scaling=False)
        oa_shrinkage[i,j] = oa.shrinkage_

# plot MSE
pl.subplot(2,1,1)
pl.errorbar(n_samples_range, lw_mse.mean(1), yerr=lw_mse.std(1),
            label='Ledoit-Wolf', color='g')
pl.errorbar(n_samples_range, oa_mse.mean(1), yerr=oa_mse.std(1),
            label='OAS', color='r')
pl.ylabel("Squared error")
Esempio n. 19
0
base_X_test = np.random.normal(size=(n_samples, n_features))

# Color samples
coloring_matrix = np.random.normal(size=(n_features, n_features))
X_train = np.dot(base_X_train, coloring_matrix)
X_test = np.dot(base_X_test, coloring_matrix)

###############################################################################
# Compute Ledoit-Wolf and Covariances on a grid of shrinkages

from sklearn.covariance import LedoitWolf, OAS, ShrunkCovariance, \
    log_likelihood, empirical_covariance

# Ledoit-Wolf optimal shrinkage coefficient estimate
lw = LedoitWolf()
loglik_lw = lw.fit(X_train, assume_centered=True).score(
    X_test, assume_centered=True)

# OAS coefficient estimate
oa = OAS()
loglik_oa = oa.fit(X_train, assume_centered=True).score(
    X_test, assume_centered=True)

# spanning a range of possible shrinkage coefficient values
shrinkages = np.logspace(-3, 0, 30)
negative_logliks = [-ShrunkCovariance(shrinkage=s).fit(
        X_train, assume_centered=True).score(X_test, assume_centered=True) \
                         for s in shrinkages]

# getting the likelihood under the real model
real_cov = np.dot(coloring_matrix.T, coloring_matrix)
emp_cov = empirical_covariance(X_train)
Esempio n. 20
0
def test_ledoit_wolf():
    # Tests LedoitWolf module on a simple dataset.
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    lw = LedoitWolf(assume_centered=True)
    lw.fit(X_centered)
    shrinkage_ = lw.shrinkage_

    score_ = lw.score(X_centered)
    assert_almost_equal(ledoit_wolf_shrinkage(X_centered,
                                              assume_centered=True),
                        shrinkage_)
    assert_almost_equal(ledoit_wolf_shrinkage(X_centered, assume_centered=True,
                                              block_size=6),
                        shrinkage_)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_centered,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf(assume_centered=True)
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_1d,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False, assume_centered=True)
    lw.fit(X_centered)
    assert_almost_equal(lw.score(X_centered), score_, 4)
    assert(lw.precision_ is None)

    # Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    lw = LedoitWolf()
    lw.fit(X)
    assert_almost_equal(lw.shrinkage_, shrinkage_, 4)
    assert_almost_equal(lw.shrinkage_, ledoit_wolf_shrinkage(X))
    assert_almost_equal(lw.shrinkage_, ledoit_wolf(X)[1])
    assert_almost_equal(lw.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf()
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_1d)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)

    # test with one sample
    # warning should be raised when using only 1 sample
    X_1sample = np.arange(5).reshape(1, 5)
    lw = LedoitWolf()
    assert_warns(UserWarning, lw.fit, X_1sample)
    assert_array_almost_equal(lw.covariance_,
                              np.zeros(shape=(5, 5), dtype=np.float64))

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False)
    lw.fit(X)
    assert_almost_equal(lw.score(X), score_, 4)
    assert(lw.precision_ is None)
Esempio n. 21
0
def shrink(X):
    lw = LedoitWolf(store_precision=False, assume_centered=False)
    lw.fit(X)
    return lw.covariance_
Esempio n. 22
0
def test_ledoit_wolf():
    # Tests LedoitWolf module on a simple dataset.
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    lw = LedoitWolf(assume_centered=True)
    lw.fit(X_centered)
    shrinkage_ = lw.shrinkage_
    score_ = lw.score(X_centered)
    assert_almost_equal(
        ledoit_wolf_shrinkage(X_centered, assume_centered=True), shrinkage_)
    assert_almost_equal(
        ledoit_wolf_shrinkage(X_centered, assume_centered=True, block_size=6),
        shrinkage_)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_centered,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf(assume_centered=True)
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal((X_1d**2).sum() / n_samples, lw.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False, assume_centered=True)
    lw.fit(X_centered)
    assert_almost_equal(lw.score(X_centered), score_, 4)
    assert (lw.precision_ is None)

    # (too) large data set
    X_large = np.ones((20, 200))
    assert_raises(MemoryError, ledoit_wolf, X_large, block_size=100)

    # Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    lw = LedoitWolf()
    lw.fit(X)
    assert_almost_equal(lw.shrinkage_, shrinkage_, 4)
    assert_almost_equal(lw.shrinkage_, ledoit_wolf_shrinkage(X))
    assert_almost_equal(lw.shrinkage_, ledoit_wolf(X)[1])
    assert_almost_equal(lw.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf()
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)

    # test with one sample
    # FIXME I don't know what this test does
    X_1sample = np.arange(5)
    lw = LedoitWolf()
    assert_warns(UserWarning, lw.fit, X_1sample)
    assert_array_almost_equal(lw.covariance_,
                              np.zeros(shape=(5, 5), dtype=np.float64))

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False)
    lw.fit(X)
    assert_almost_equal(lw.score(X), score_, 4)
    assert (lw.precision_ is None)
Esempio n. 23
0
except ImportError:
    has_sklearn = False
    print 'sklearn not available'


def cov2corr(cov):
    std_ = np.sqrt(np.diag(cov))
    corr = cov / np.outer(std_, std_)
    return corr


if has_sklearn:
    from sklearn.covariance import LedoitWolf, OAS, MCD

    lw = LedoitWolf(store_precision=False)
    lw.fit(rr, assume_centered=False)
    cov_lw = lw.covariance_
    corr_lw = cov2corr(cov_lw)

    oas = OAS(store_precision=False)
    oas.fit(rr, assume_centered=False)
    cov_oas = oas.covariance_
    corr_oas = cov2corr(cov_oas)

    mcd = MCD()  #.fit(rr, reweight=None)
    mcd.fit(rr, assume_centered=False)
    cov_mcd = mcd.covariance_
    corr_mcd = cov2corr(cov_mcd)

    titles = ['raw correlation', 'lw', 'oas', 'mcd']
    normcolor = None
def shrink(X):
        lw = LedoitWolf(store_precision=False, assume_centered=False)
        lw.fit(X)
        return lw.covariance_
Esempio n. 25
0
def test_ledoit_wolf():
    # Tests LedoitWolf module on a simple dataset.
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    lw = LedoitWolf(assume_centered=True)
    lw.fit(X_centered)
    shrinkage_ = lw.shrinkage_

    score_ = lw.score(X_centered)
    assert_almost_equal(
        ledoit_wolf_shrinkage(X_centered, assume_centered=True), shrinkage_)
    assert_almost_equal(
        ledoit_wolf_shrinkage(X_centered, assume_centered=True, block_size=6),
        shrinkage_)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_centered,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf(assume_centered=True)
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_1d,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
    assert_array_almost_equal((X_1d**2).sum() / n_samples, lw.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False, assume_centered=True)
    lw.fit(X_centered)
    assert_almost_equal(lw.score(X_centered), score_, 4)
    assert (lw.precision_ is None)

    # Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    lw = LedoitWolf()
    lw.fit(X)
    assert_almost_equal(lw.shrinkage_, shrinkage_, 4)
    assert_almost_equal(lw.shrinkage_, ledoit_wolf_shrinkage(X))
    assert_almost_equal(lw.shrinkage_, ledoit_wolf(X)[1])
    assert_almost_equal(lw.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf()
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_1d)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)

    # test with one sample
    # warning should be raised when using only 1 sample
    X_1sample = np.arange(5).reshape(1, 5)
    lw = LedoitWolf()

    warn_msg = (
        "Only one sample available. You may want to reshape your data array")
    with pytest.warns(UserWarning, match=warn_msg):
        lw.fit(X_1sample)

    assert_array_almost_equal(lw.covariance_,
                              np.zeros(shape=(5, 5), dtype=np.float64))

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False)
    lw.fit(X)
    assert_almost_equal(lw.score(X), score_, 4)
    assert (lw.precision_ is None)
def test_connectivity_measure_outputs():
    n_subjects = 10
    n_features = 49
    n_samples = 200

    # Generate signals and compute covariances
    emp_covs = []
    ledoit_covs = []
    signals = []
    random_state = check_random_state(0)
    ledoit_estimator = LedoitWolf()
    for k in range(n_subjects):
        signal = random_state.randn(n_samples, n_features)
        signals.append(signal)
        signal -= signal.mean(axis=0)
        emp_covs.append((signal.T).dot(signal) / n_samples)
        ledoit_covs.append(ledoit_estimator.fit(signal).covariance_)

    kinds = ["correlation", "tangent", "precision", "partial correlation"]

    # Check outputs properties
    for cov_estimator, covs in zip(
        [EmpiricalCovariance(), LedoitWolf()], [emp_covs, ledoit_covs]):
        input_covs = copy.copy(covs)
        for kind in kinds:
            conn_measure = ConnectivityMeasure(kind=kind,
                                               cov_estimator=cov_estimator)
            connectivities = conn_measure.fit_transform(signals)

            # Generic
            assert_true(isinstance(connectivities, np.ndarray))
            assert_equal(len(connectivities), len(covs))

            for k, cov_new in enumerate(connectivities):
                assert_array_equal(input_covs[k], covs[k])
                assert (is_spd(covs[k], decimal=7))

                # Positive definiteness if expected and output value checks
                if kind == "tangent":
                    assert_array_almost_equal(cov_new, cov_new.T)
                    gmean_sqrt = _map_eigenvalues(np.sqrt, conn_measure.mean_)
                    assert (is_spd(gmean_sqrt, decimal=7))
                    assert (is_spd(conn_measure.whitening_, decimal=7))
                    assert_array_almost_equal(
                        conn_measure.whitening_.dot(gmean_sqrt),
                        np.eye(n_features))
                    assert_array_almost_equal(
                        gmean_sqrt.dot(_map_eigenvalues(
                            np.exp, cov_new)).dot(gmean_sqrt), covs[k])
                elif kind == "precision":
                    assert (is_spd(cov_new, decimal=7))
                    assert_array_almost_equal(cov_new.dot(covs[k]),
                                              np.eye(n_features))
                elif kind == "correlation":
                    assert (is_spd(cov_new, decimal=7))
                    d = np.sqrt(np.diag(np.diag(covs[k])))
                    if cov_estimator == EmpiricalCovariance():
                        assert_array_almost_equal(
                            d.dot(cov_new).dot(d), covs[k])
                    assert_array_almost_equal(np.diag(cov_new),
                                              np.ones((n_features)))
                elif kind == "partial correlation":
                    prec = linalg.inv(covs[k])
                    d = np.sqrt(np.diag(np.diag(prec)))
                    assert_array_almost_equal(
                        d.dot(cov_new).dot(d),
                        -prec + 2 * np.diag(np.diag(prec)))
Esempio n. 27
0
real_cov = toeplitz(r ** np.arange(n_features))
coloring_matrix = cholesky(real_cov)

n_samples_range = np.arange(6, 31, 1)
repeat = 100
lw_mse = np.zeros((n_samples_range.size, repeat))
oa_mse = np.zeros((n_samples_range.size, repeat))
lw_shrinkage = np.zeros((n_samples_range.size, repeat))
oa_shrinkage = np.zeros((n_samples_range.size, repeat))
for i, n_samples in enumerate(n_samples_range):
    for j in range(repeat):
        X = np.dot(
            np.random.normal(size=(n_samples, n_features)), coloring_matrix.T)

        lw = LedoitWolf(store_precision=False, assume_centered=True)
        lw.fit(X)
        lw_mse[i, j] = lw.error_norm(real_cov, scaling=False)
        lw_shrinkage[i, j] = lw.shrinkage_

        oa = OAS(store_precision=False, assume_centered=True)
        oa.fit(X)
        oa_mse[i, j] = oa.error_norm(real_cov, scaling=False)
        oa_shrinkage[i, j] = oa.shrinkage_

# plot MSE
plt.subplot(2, 1, 1)
plt.errorbar(n_samples_range, lw_mse.mean(1), yerr=lw_mse.std(1),
             label='Ledoit-Wolf', color='g')
plt.errorbar(n_samples_range, oa_mse.mean(1), yerr=oa_mse.std(1),
             label='OAS', color='r')
plt.ylabel("Squared error")
Esempio n. 28
0
def lda_train_scaled(fv, shrink=False):
    """Train the LDA classifier.

    Parameters
    ----------
    fv : ``Data`` object
        the feature vector must have 2 dimensional data, the first
        dimension being the class axis. The unique class labels must be
        0 and 1 otherwise a ``ValueError`` will be raised.
    shrink : Boolean, optional
        use shrinkage

    Returns
    -------
    w : 1d array
    b : float

    Raises
    ------
    ValueError : if the class labels are not exactly 0s and 1s

    Examples
    --------

    >>> clf = lda_train(fv_train)
    >>> out = lda_apply(fv_test, clf)

    See Also
    --------
    lda_apply

    """
    assert shrink is True
    x = fv.data
    y = fv.axes[0]
    if len(np.unique(y)) != 2:
        raise ValueError(
            'Should only have two unique class labels, instead got'
            ': {labels}'.format(labels=np.unique(y)))
    # Use sorted labels
    labels = np.sort(np.unique(y))
    mu1 = np.mean(x[y == labels[0]], axis=0)
    mu2 = np.mean(x[y == labels[1]], axis=0)
    # x' = x - m
    m = np.empty(x.shape)
    m[y == labels[0]] = mu1
    m[y == labels[1]] = mu2
    x2 = x - m
    # w = cov(x)^-1(mu2 - mu1)
    if shrink:
        estimator = LW()
        covm = estimator.fit(x2).covariance_
    else:
        covm = np.cov(x2.T)
    w = np.dot(np.linalg.pinv(covm), (mu2 - mu1))

    #  From matlab bbci toolbox:
    # https://github.com/bbci/bbci_public/blob/fe6caeb549fdc864a5accf76ce71dd2a926ff12b/classification/train_RLDAshrink.m#L133-L134
    #C.w= C.w/(C.w'*diff(C_mean, 1, 2))*2;
    #C.b= -C.w' * mean(C_mean,2);
    w = (w / np.dot(w.T, (mu2 - mu1))) * 2
    b = np.dot(-w.T, np.mean((mu1, mu2), axis=0))
    assert not np.any(np.isnan(w))
    assert not np.isnan(b)
    return w, b
Esempio n. 29
0
def lda_train_scaled(fv, shrink=False):
    """Train the LDA classifier.

    Parameters
    ----------
    fv : ``Data`` object
        the feature vector must have 2 dimensional data, the first
        dimension being the class axis. The unique class labels must be
        0 and 1 otherwise a ``ValueError`` will be raised.
    shrink : Boolean, optional
        use shrinkage

    Returns
    -------
    w : 1d array
    b : float

    Raises
    ------
    ValueError : if the class labels are not exactly 0s and 1s

    Examples
    --------

    >>> clf = lda_train(fv_train)
    >>> out = lda_apply(fv_test, clf)

    See Also
    --------
    lda_apply

    """
    assert shrink is True
    x = fv.data
    y = fv.axes[0]
    if len(np.unique(y)) != 2:
        raise ValueError('Should only have two unique class labels, instead got'
            ': {labels}'.format(labels=np.unique(y)))
    # Use sorted labels
    labels = np.sort(np.unique(y))
    mu1 = np.mean(x[y == labels[0]], axis=0)
    mu2 = np.mean(x[y == labels[1]], axis=0)
    # x' = x - m
    m = np.empty(x.shape)
    m[y == labels[0]] = mu1
    m[y == labels[1]] = mu2
    x2 = x - m
    # w = cov(x)^-1(mu2 - mu1)
    if shrink:
        estimator = LW()
        covm = estimator.fit(x2).covariance_
    else:
        covm = np.cov(x2.T)
    w = np.dot(np.linalg.pinv(covm), (mu2 - mu1))

    #  From matlab bbci toolbox:
    # https://github.com/bbci/bbci_public/blob/fe6caeb549fdc864a5accf76ce71dd2a926ff12b/classification/train_RLDAshrink.m#L133-L134
    #C.w= C.w/(C.w'*diff(C_mean, 1, 2))*2;
    #C.b= -C.w' * mean(C_mean,2);
    w = (w / np.dot(w.T, (mu2 - mu1))) * 2
    b = np.dot(-w.T, np.mean((mu1, mu2), axis=0))
    assert not np.any(np.isnan(w))
    assert not np.isnan(b)
    return w, b
def test_connectivity_measure_outputs():
    n_subjects = 10
    n_features = 49

    # Generate signals and compute covariances
    emp_covs = []
    ledoit_covs = []
    signals = []
    ledoit_estimator = LedoitWolf()
    for k in range(n_subjects):
        n_samples = 200 + k
        signal, _, _ = generate_signals(n_features=n_features, n_confounds=5,
                                        length=n_samples, same_variance=False)
        signals.append(signal)
        signal -= signal.mean(axis=0)
        emp_covs.append((signal.T).dot(signal) / n_samples)
        ledoit_covs.append(ledoit_estimator.fit(signal).covariance_)

    kinds = ["covariance", "correlation", "tangent", "precision",
             "partial correlation"]

    # Check outputs properties
    for cov_estimator, covs in zip([EmpiricalCovariance(), LedoitWolf()],
                                   [emp_covs, ledoit_covs]):
        input_covs = copy.copy(covs)
        for kind in kinds:
            conn_measure = ConnectivityMeasure(kind=kind,
                                               cov_estimator=cov_estimator)
            connectivities = conn_measure.fit_transform(signals)

            # Generic
            assert isinstance(connectivities, np.ndarray)
            assert len(connectivities) == len(covs)

            for k, cov_new in enumerate(connectivities):
                assert_array_equal(input_covs[k], covs[k])
                assert(is_spd(covs[k], decimal=7))

                # Positive definiteness if expected and output value checks
                if kind == "tangent":
                    assert_array_almost_equal(cov_new, cov_new.T)
                    gmean_sqrt = _map_eigenvalues(np.sqrt,
                                                  conn_measure.mean_)
                    assert(is_spd(gmean_sqrt, decimal=7))
                    assert(is_spd(conn_measure.whitening_, decimal=7))
                    assert_array_almost_equal(conn_measure.whitening_.dot(
                        gmean_sqrt), np.eye(n_features))
                    assert_array_almost_equal(gmean_sqrt.dot(
                        _map_eigenvalues(np.exp, cov_new)).dot(gmean_sqrt),
                        covs[k])
                elif kind == "precision":
                    assert(is_spd(cov_new, decimal=7))
                    assert_array_almost_equal(cov_new.dot(covs[k]),
                                              np.eye(n_features))
                elif kind == "correlation":
                    assert(is_spd(cov_new, decimal=7))
                    d = np.sqrt(np.diag(np.diag(covs[k])))
                    if cov_estimator == EmpiricalCovariance():
                        assert_array_almost_equal(d.dot(cov_new).dot(d),
                                                  covs[k])
                    assert_array_almost_equal(np.diag(cov_new),
                                              np.ones((n_features)))
                elif kind == "partial correlation":
                    prec = linalg.inv(covs[k])
                    d = np.sqrt(np.diag(np.diag(prec)))
                    assert_array_almost_equal(d.dot(cov_new).dot(d), -prec +
                                              2 * np.diag(np.diag(prec)))

    # Check the mean_
    for kind in kinds:
        conn_measure = ConnectivityMeasure(kind=kind)
        conn_measure.fit_transform(signals)
        assert (conn_measure.mean_).shape == (n_features, n_features)
        if kind != 'tangent':
            assert_array_almost_equal(
                conn_measure.mean_,
                np.mean(conn_measure.transform(signals), axis=0))

    # Check that the mean isn't modified in transform
    conn_measure = ConnectivityMeasure(kind='covariance')
    conn_measure.fit(signals[:1])
    mean = conn_measure.mean_
    conn_measure.transform(signals[1:])
    assert_array_equal(mean, conn_measure.mean_)

    # Check vectorization option
    for kind in kinds:
        conn_measure = ConnectivityMeasure(kind=kind)
        connectivities = conn_measure.fit_transform(signals)
        conn_measure = ConnectivityMeasure(vectorize=True, kind=kind)
        vectorized_connectivities = conn_measure.fit_transform(signals)
        assert_array_almost_equal(vectorized_connectivities,
                                  sym_matrix_to_vec(connectivities))

    # Check not fitted error
    with pytest.raises(ValueError, match='has not been fitted. '):
        ConnectivityMeasure().inverse_transform(vectorized_connectivities)

    # Check inverse transformation
    kinds.remove('tangent')
    for kind in kinds:
        # without vectorization: input matrices are returned with no change
        conn_measure = ConnectivityMeasure(kind=kind)
        connectivities = conn_measure.fit_transform(signals)
        assert_array_almost_equal(
            conn_measure.inverse_transform(connectivities), connectivities)

        # with vectorization: input vectors are reshaped into matrices
        # if diagonal has not been discarded
        conn_measure = ConnectivityMeasure(kind=kind, vectorize=True)
        vectorized_connectivities = conn_measure.fit_transform(signals)
        assert_array_almost_equal(
            conn_measure.inverse_transform(vectorized_connectivities),
            connectivities)

    # with vectorization if diagonal has been discarded
    for kind in ['correlation', 'partial correlation']:
        connectivities = ConnectivityMeasure(kind=kind).fit_transform(signals)
        conn_measure = ConnectivityMeasure(kind=kind, vectorize=True,
                                           discard_diagonal=True)
        vectorized_connectivities = conn_measure.fit_transform(signals)
        assert_array_almost_equal(
            conn_measure.inverse_transform(vectorized_connectivities),
            connectivities)

    for kind in ['covariance', 'precision']:
        connectivities = ConnectivityMeasure(kind=kind).fit_transform(signals)
        conn_measure = ConnectivityMeasure(kind=kind, vectorize=True,
                                           discard_diagonal=True)
        vectorized_connectivities = conn_measure.fit_transform(signals)
        diagonal = np.array([np.diagonal(conn) / sqrt(2) for conn in
                             connectivities])
        inverse_transformed = conn_measure.inverse_transform(
            vectorized_connectivities, diagonal=diagonal)
        assert_array_almost_equal(inverse_transformed, connectivities)
        with pytest.raises(ValueError,
                           match='can not reconstruct connectivity matrices'):
            conn_measure.inverse_transform(vectorized_connectivities)

    # for 'tangent' kind, covariance matrices are reconstructed
    # without vectorization
    tangent_measure = ConnectivityMeasure(kind='tangent')
    displacements = tangent_measure.fit_transform(signals)
    covariances = ConnectivityMeasure(kind='covariance').fit_transform(
        signals)
    assert_array_almost_equal(
        tangent_measure.inverse_transform(displacements), covariances)

    # with vectorization
    # when diagonal has not been discarded
    tangent_measure = ConnectivityMeasure(kind='tangent', vectorize=True)
    vectorized_displacements = tangent_measure.fit_transform(signals)
    assert_array_almost_equal(
        tangent_measure.inverse_transform(vectorized_displacements),
        covariances)

    # when diagonal has been discarded
    tangent_measure = ConnectivityMeasure(kind='tangent', vectorize=True,
                                          discard_diagonal=True)
    vectorized_displacements = tangent_measure.fit_transform(signals)
    diagonal = np.array([np.diagonal(matrix) / sqrt(2) for matrix in
                         displacements])
    inverse_transformed = tangent_measure.inverse_transform(
        vectorized_displacements, diagonal=diagonal)
    assert_array_almost_equal(inverse_transformed, covariances)
    with pytest.raises(ValueError,
                       match='can not reconstruct connectivity matrices'):
        tangent_measure.inverse_transform(vectorized_displacements)
    # different covariance matrix predictions
    cov_sample = predict_cov_sample(returns_sample)
    cor_sample = predict_cov_sample(returns_sample, True)

    cov_upper = cov_sample[np.triu_indices(cov_sample.shape[0], k=1)]
    cor_upper = cor_sample[np.triu_indices(cor_sample.shape[0], k=1)]
    sample_mean_cov = cov_upper.mean()
    sample_mean_cor = cor_upper.mean()
    sample_mean_var = np.diagonal(cov_sample).mean()

    if model_train_sample == "whole":
        if which_data == "both":
            if predict_corr:
                LW = LedoitWolf()
                cov_lw = LW.fit(returns_sample).covariance_
                cov_model = predict_cov_matrix_both(
                    lr, scaler, features_out_of_sample_reports,
                    features_out_of_sample_industry, sample_mean_cor,
                    standardize_cov_matrix, cov_lw)
            else:
                cov_model = predict_cov_matrix_both(
                    lr, scaler, features_out_of_sample_reports,
                    features_out_of_sample_industry, sample_mean_cov,
                    standardize_cov_matrix, cov_sample, False)

        else:
            if predict_corr:
                LW = LedoitWolf()
                cov_lw = LW.fit(returns_sample).covariance_
                cov_model = predict_correlation_matrix_model(
Esempio n. 32
0
def plot_psds(psd_file, data_dir='/auto/tdrive/mschachter/data'):

    # read PairwiseCF file
    pcf_file = os.path.join(data_dir, 'aggregate', 'pairwise_cf.h5')
    pcf = AggregatePairwiseCF.load(pcf_file)
    # pcf.zscore_within_site()

    g = pcf.df.groupby(['bird', 'block', 'segment', 'electrode'])
    nsamps_electrodes = len(g)

    i = pcf.df.cell_index != -1
    g = pcf.df[i].groupby(['bird', 'block', 'segment', 'electrode', 'cell_index'])
    nsamps_cells = len(g)

    print '# of electrodes: %d' % nsamps_electrodes
    print '# of cells: %d' % nsamps_cells
    print '# of lfp samples: %d' % (pcf.lfp_psds.shape[0])
    print '# of spike psd samples: %d' % (pcf.spike_psds.shape[0])

    # compute the LFP mean and std
    lfp_psds = deepcopy(pcf.lfp_psds)
    print 'lfp_psds_ind: max=%f, q99=%f' % (lfp_psds.max(), np.percentile(lfp_psds.ravel(), 99))
    log_transform(lfp_psds)
    print 'lfp_psds_ind: max=%f, q99=%f' % (lfp_psds.max(), np.percentile(lfp_psds.ravel(), 99))
    nz = lfp_psds.sum(axis=1) > 0
    lfp_psds = lfp_psds[nz, :]
    lfp_psd_mean = lfp_psds.mean(axis=0)
    lfp_psd_std = lfp_psds.std(axis=0, ddof=1)
    nsamps_lfp = lfp_psds.shape[0]

    # get the spike rate
    spike_rate = pcf.df.spike_rate.values
    # plt.figure()
    # plt.hist(spike_rate, bins=20, color='g', alpha=0.7)
    # plt.title('Spike Rate Histogram, q1=%0.3f, q5=%0.3f, q10=%0.3f, q50=%0.3f, q99=%0.3f' %
    #           (np.percentile(spike_rate, 1), np.percentile(spike_rate, 5), np.percentile(spike_rate, 10),
    #           np.percentile(spike_rate, 50), np.percentile(spike_rate, 99)))
    # plt.show()

    # compute the covariance
    lfp_psd_z = deepcopy(lfp_psds)
    lfp_psd_z -= lfp_psd_mean
    lfp_psd_z /= lfp_psd_std
    lfp_and_spike_cov_est = LedoitWolf()
    lfp_and_spike_cov_est.fit(lfp_psd_z)
    lfp_and_spike_cov = lfp_and_spike_cov_est.covariance_

    """
    # read CRCNS file
    cell_data = dict()
    hf = h5py.File(psd_file, 'r')
    cnames = hf.attrs['col_names']
    for c in cnames:
        cell_data[c] = np.array(hf[c])
    crcns_psds = np.array(hf['psds'])
    freqs = hf.attrs['freqs']
    hf.close()

    cell_df = pd.DataFrame(cell_data)
    print 'regions=',cell_df.superregion.unique()

    name_map = {'brainstem':'MLd', 'thalamus':'OV', 'cortex':'Field L+CM'}
    """

    # resample the lfp mean and std
    freq_rs = np.linspace(pcf.freqs.min(), pcf.freqs.max(), 1000)
    
    lfp_mean_cs = interp1d(pcf.freqs, lfp_psd_mean, kind='cubic')
    lfp_mean_rs = lfp_mean_cs(freq_rs)
    
    lfp_std_cs = interp1d(pcf.freqs, lfp_psd_std, kind='cubic')
    lfp_std_rs = lfp_std_cs(freq_rs)

    # concatenate the lfp psd and log spike rate
    lfp_psd_and_spike_rate = list()
    for k,(li,si) in enumerate(zip(pcf.df['lfp_index'], pcf.df['spike_index'])):
        lpsd = pcf.lfp_psds[li, :]
        srate,sstd = pcf.spike_rates[si, :]
        if srate > 0:
            lfp_psd_and_spike_rate.append(np.hstack([lpsd, np.log(srate)]))
    lfp_psd_and_spike_rate = np.array(lfp_psd_and_spike_rate)

    nfreqs = len(pcf.freqs)
    lfp_rate_cc = np.zeros([nfreqs])
    for k in range(nfreqs):
        lfp_rate_cc[k] = np.corrcoef(lfp_psd_and_spike_rate[:, k], lfp_psd_and_spike_rate[:, -1])[0, 1]

    fig = plt.figure(figsize=(24, 12))
    fig.subplots_adjust(left=0.05, right=0.95, wspace=0.30, hspace=0.30)

    nrows = 2
    ncols = 100
    gs = plt.GridSpec(nrows, ncols)

    ax = plt.subplot(gs[0, :35])
    plt.errorbar(freq_rs, lfp_mean_rs, yerr=lfp_std_rs, c='k', linewidth=9.0, elinewidth=3.0,
                 ecolor='#D8D8D8', alpha=0.5, capthick=0.)
    plt.axis('tight')
    plt.xlabel('Frequency (Hz)')
    plt.ylabel('Power (dB)')
    # plt.ylim(0, 1)
    plt.title('Mean LFP PSD')

    ax = plt.subplot(gs[1, :35])
    plt.plot(pcf.freqs, lfp_rate_cc, '-', c=COLOR_BLUE_LFP, linewidth=9.0, alpha=0.7)
    plt.axhline(0, c='k')
    plt.axis('tight')
    plt.xlabel('Frequency (Hz)')
    plt.ylabel('Correlation Coefficient')
    plt.ylim(-0.05, 0.25)
    plt.title('LFP Power vs log Spike Rate')

    """
    fi = freqs < 200
    ax = plt.subplot(gs[1, :35])
    clrs = ['k', '#d60036', COLOR_YELLOW_SPIKE]
    alphas = [0.8, 0.8, 0.6]
    for k,reg in enumerate(['brainstem', 'thalamus', 'cortex']):

        i = cell_df.superregion == reg
        indices = cell_df['index'][i].values
        psds = crcns_psds[indices, :]
        log_psds = deepcopy(psds)
        log_transform(log_psds)

        # compute the mean and sd of the power spectra
        psd_mean = log_psds.mean(axis=0)
        psd_std = log_psds.std(axis=0, ddof=1)
        psd_cv = psd_std / psd_mean

        # plot the mean power spectrum on the left
        plt.plot(freqs[fi], psd_mean[fi], c=clrs[k], linewidth=9.0, alpha=alphas[k])
        plt.ylabel('Power (dB)')
        plt.xlabel('Frequency (Hz)')
        plt.axis('tight')
        plt.ylim(0, 1.0)
    plt.legend(['MLd', 'OV', 'Field L+CM'], fontsize='x-small', loc='upper right')
    plt.title('Mean PSTH PSDs (CRCNS Data)')
    """

    ax = plt.subplot(gs[:, 40:])
    plt.imshow(lfp_and_spike_cov, aspect='auto', interpolation='nearest', origin='lower', cmap=magma, vmin=0, vmax=1)
    plt.colorbar(label='Correlation Coefficient')
    xy = np.arange(len(pcf.freqs))
    lbls = ['%d' % f for f in pcf.freqs]
    plt.xticks(xy, lbls, rotation=0)
    plt.yticks(xy, lbls)
    plt.axhline(nfreqs-0.5, c='w')
    plt.axvline(nfreqs-0.5, c='w')
    plt.xlabel('Frequency (Hz)')
    plt.ylabel('Frequency (Hz)')
    plt.title('LFP PSD Correlation Matrix')

    fname = os.path.join(get_this_dir(), 'crcns_data.svg')
    plt.savefig(fname, facecolor='w', edgecolor='none')

    plt.show()
import mne
import numpy as np
from sklearn.covariance import LedoitWolf

from camcan.preprocessing import extract_connectivity
from camcan.processing import map_tangent

sample_data = Path(mne.datasets.sample.data_path())
fname = sample_data / Path('MEG/sample/sample_audvis_raw.fif')

raw = mne.io.read_raw_fif(str(fname), preload=True)

tmin = 0
tmax = 2
baseline = None

events = mne.find_events(raw)[:10]
raw.pick_types(meg='mag', eeg=False)
epochs = mne.Epochs(raw=raw, tmin=tmin, tmax=tmax, events=events, decim=5)
timeseries = epochs.get_data()

connectivity_tangent = extract_connectivity(timeseries, kind='tangent')

cov_estimator = LedoitWolf(store_precision=False)
connectivities = [cov_estimator.fit(x).covariance_ for x in timeseries]

connectivity_tangent2 = map_tangent(connectivities, diag=False)

np.testing.assert_array_equal(connectivity_tangent, connectivity_tangent2)
c_des_out=np.logical_not(label[:,2]== b'des')
tmp_out= np.logical_and(c_des_out,mask_block)
c_rest_out=np.logical_not(label[:,0]== b'rest')
cond_out= np.logical_and(tmp_out,c_rest_out)
y=label[cond_out,2]
labels=np.unique(y)
# Prepare correlation
estimator = LedoitWolf()
scaler=StandardScaler()
# Create np array
result_matrix = np.empty([len(names),motor_region.shape[0],labels.shape[0],labels.shape[0]])

#Analysis for each subject
for i,n in enumerate(sorted(names)):
    roi_name=fold_g+'mni4060/asymroi_'+smt+'_'+n+'.npz'   
    roi=np.load(roi_name)['roi'][cond_out]
    roi=roi[:,motor_region-1] 
    for j in range(motor_region.shape[0]):
        roi_j=roi[:,j]
        roi_mat=np.zeros(((y==b'imp').sum(),len(labels)))
        for z,lab in enumerate(sorted(labels)):
            roi_mat[:,z]=roi_j[y==lab]           
        roi_sc=scaler.fit_transform(roi_mat) 
        estimator.fit(roi_sc)
        matrix=estimator.covariance_ 
        result_matrix[i,j]=1-matrix

np.savez_compressed('F:/IRM_Marche/dismatrix.npz',result_matrix)


Esempio n. 35
0
    def get_weight(self, date, IC_length, period, weight_way, halflife=0):
        IC_use_all = self.IC_all.loc[:date,
                                     self.factor_list].iloc[-IC_length -
                                                            period:-period]
        IC_use = copy.deepcopy(IC_use_all)

        temp = -1
        loc = []
        for f in self.factor_list:
            temp += 1
            # 去掉IC缺失过多的因子
            if Counter(np.isnan(IC_use[f]))[0] < IC_use.shape[0] * 0.2:
                loc.append(temp)
                IC_use = IC_use.drop(f, 1)

        ind_valid = np.where(~np.isnan(
            IC_use.sum(axis=1, skipna=False).values))[0]  # 所有因子都有ic值的行index
        IC_use = IC_use.iloc[ind_valid]
        IC_mean = IC_use.mean(axis=0).values.reshape(IC_use.shape[1], 1)
        if weight_way == 'ICIR_Ledoit':
            lw = LedoitWolf()
            IC_sig = lw.fit(IC_use.values).covariance_
            weight = np.dot(np.linalg.inv(IC_sig), IC_mean)

        elif weight_way == 'ICIR_sigma':
            IC_sig = np.cov(IC_use.values, rowvar=False)
            weight = np.dot(np.linalg.inv(IC_sig), IC_mean)

        elif weight_way == 'ICIR':
            IC_sig = (IC_use.std(axis=0)).values.reshape(IC_use.shape[1], 1)
            weight = IC_mean / IC_sig

        elif weight_way == 'IC_halflife':
            if halflife > 0:
                lam = pow(1 / 2, 1 / 60)
            else:
                lam = 1
            len_IC = IC_use.shape[0]
            w = np.array([pow(lam, len_IC - 1 - i) for i in range(len_IC)])
            w = w / sum(w)
            weight = IC_use.mul(pd.Series(data=w, index=IC_use.index),
                                axis=0).sum(axis=0).values

        elif weight_way == 'ICIR_halflife':
            if halflife > 0:
                lam = pow(1 / 2, 1 / halflife)
            else:
                lam = 1
            len_IC = IC_use.shape[0]
            w = np.array([pow(lam, len_IC - 1 - i) for i in range(len_IC)])
            w = w / sum(w)
            ic_mean = IC_use.mul(pd.Series(data=w, index=IC_use.index),
                                 axis=0).sum(axis=0)
            ic_std = np.sqrt((np.power(IC_use - ic_mean,
                                       2)).mul(pd.Series(data=w,
                                                         index=IC_use.index),
                                               axis=0).sum(axis=0))
            weight = ic_mean.values / ic_std.values

        elif weight_way == 'equal':
            weight = np.sign(IC_mean)

        w = np.array([np.nan] * len(self.factor_list))
        flag = 0
        for i in range(len(self.factor_list)):
            if i not in loc:
                w[i] = weight[flag]
                flag += 1
            else:
                w[i] = 0.0  # IC有效值过少,因子权重为0
        weight = pd.Series(w, index=self.factor_list)

        return weight
Esempio n. 36
0
def test_ledoit_wolf():
    """Tests LedoitWolf module on a simple dataset.

    """
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    lw = LedoitWolf(assume_centered=True)
    lw.fit(X_centered)
    shrinkage_ = lw.shrinkage_
    score_ = lw.score(X_centered)
    assert_almost_equal(ledoit_wolf_shrinkage(X_centered,
                                              assume_centered=True),
                        shrinkage_)
    assert_almost_equal(ledoit_wolf_shrinkage(X_centered,
                                assume_centered=True, block_size=6),
                        shrinkage_)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_centered,
                                                        assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf(assume_centered=True)
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False, assume_centered=True)
    lw.fit(X_centered)
    assert_almost_equal(lw.score(X_centered), score_, 4)
    assert(lw.precision_ is None)

    # (too) large data set
    X_large = np.ones((20, 200))
    assert_raises(MemoryError, ledoit_wolf, X_large, block_size=100)

    # Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    lw = LedoitWolf()
    lw.fit(X)
    assert_almost_equal(lw.shrinkage_, shrinkage_, 4)
    assert_almost_equal(lw.shrinkage_, ledoit_wolf_shrinkage(X))
    assert_almost_equal(lw.shrinkage_, ledoit_wolf(X)[1])
    assert_almost_equal(lw.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf()
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)

    # test with one sample
    X_1sample = np.arange(5)
    lw = LedoitWolf()
    with warnings.catch_warnings(record=True):
        lw.fit(X_1sample)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False)
    lw.fit(X)
    assert_almost_equal(lw.score(X), score_, 4)
    assert(lw.precision_ is None)
Esempio n. 37
0
    def threshold_from_simulations(self, X, precision=2000, verbose=False,
                                   n_jobs=-1):
        """
        """
        import multiprocessing as mp
        if n_jobs < 1:
            n_jobs = mp.cpu_count()
        n_samples, n_features = X.shape
        n = n_samples
        p = n_features
        h = self.support_.sum()
        lw = LedoitWolf()
        ref_covariance = lw.fit(X[self.support_]).covariance_
        c = sp.stats.chi2(p + 2).cdf(
            sp.stats.chi2(p).ppf(float(h) / n)) / (float(h) / n)
        sigma_root = np.linalg.cholesky(ref_covariance / c)
        all_h = []

        # inliers distribution
        dist_in = np.array([], ndmin=1)
        max_i = max(1, int(precision / float(self.support_.sum())))
        for i in range(max_i):
            if verbose and max_i > 4 and (i % (max_i / 4) == 0):
                print "\t", 50 * i / float(max_i), "%"
            #sigma_root = np.diag(np.sqrt(eigenvalues))
            #sigma_root = np.eye(n_features)
            X1, _ = dg.generate_gaussian(
                n_samples, n_features, np.zeros(n_features),
                cov_root=sigma_root)
            # learn location and shape
            clf = EllipticEnvelopeRMCDl1(
                correction=self.correction, shrinkage=self.shrinkage,
                h=self.support_.sum() / float(n_samples), no_fit=True).fit(
                X1)
            X2 = X1 - clf.location_
            dist_in = np.concatenate(
                (dist_in, clf.decision_function(
                        X2[clf.support_], raw_values=True)))
            all_h.append(clf.h)

        # outliers distribution
        dist_out = np.array([], ndmin=1)
        max_i = max(1, int(precision / float(n_samples - self.support_.sum())))
        for i in range(max_i):
            if verbose and max_i > 4 and (i % (max_i / 4) == 0):
                print "\t", 50 * (1. + i / float(max_i)), "%"
            X1, _ = dg.generate_gaussian(
                n_samples, n_features, np.zeros(n_features),
                cov_root=sigma_root)
            # learn location and shape
            clf = EllipticEnvelopeRMCDl1(
                correction=self.correction, shrinkage=self.shrinkage,
                h=self.support_.sum() / float(n_samples), no_fit=True).fit(X1)
            X2 = X1 - clf.location_
            dist_out = np.concatenate(
                (dist_out, clf.decision_function(
                        X2[~clf.support_], raw_values=True)))
            all_h.append(clf.h)
        self.dist_in = np.sort(dist_in)
        self.dist_out = np.sort(dist_out)
        self.h_mean = np.mean(all_h)

        return self.dist_out
Esempio n. 38
0
class DCS_kd(BaseEstimator):

    def __init__(self, k=2, gamma=1.0, covariance_estimator='ledoit-wolf'):
        self.k = float(k)
        self.gamma = gamma
        self.covariance_estimator = covariance_estimator

        if covariance_estimator == 'empirical':
            self.cov = EmpiricalCovariance(store_precision=False)
        elif covariance_estimator == 'ledoit-wolf':
            self.cov = LedoitWolf(store_precision=False)
        else:
            raise NotImplementedError('%s is not implemented' % covariance_estimator)

        self.x0 = None
        self.x1 = None

    def fit(self, x, y):
        self.x0 = x[y == min(y)]
        self.x1 = x[y == max(y)]

    def __str__(self):
        return 'Analytical Cauchy-Schwarz Divergence in {}-d'.format(self.k)

    def value(self, v):
        # We need matrix, not vector
        v = v.reshape(-1, self.k)

        ipx0 = self._ipx(self.x0, self.x0, v)
        ipx1 = self._ipx(self.x1, self.x1, v)
        ipx2 = self._ipx(self.x0, self.x1, v)

        return np.log(ipx0) + np.log(ipx1) - 2 * np.log(ipx2)

    def derivative(self, v):
        # We need matrix, not vector
        v = v.reshape(-1, self.k)

        ret = (self._d_ipx(self.x0, self.x0, v) / self._ipx(self.x0, self.x0, v)
               + self._d_ipx(self.x1, self.x1, v) / self._ipx(self.x1, self.x1, v)
               - 2 * self._d_ipx(self.x0, self.x1, v) / self._ipx(self.x0, self.x1, v))

        return ret.reshape(-1)

    def _H(self, X0, X1):
        n = (4.0 / (self.k + 2)) ** (2.0 / (self.k + 4))
        p = (-2.0 / (self.k + 4))
        return n * (X0.shape[0] ** p * self.cov.fit(X0).covariance_ + X1.shape[0] ** p * self.cov.fit(X1).covariance_)

    def _f1(self, X0, X1, v):
        Hxy = self.gamma * self.gamma * self._H(X0, X1)
        vHv = v.T.dot(Hxy).dot(v)
        # return 1.0 / np.sqrt(la.det(vHv))
        return 1.0 / (X0.shape[0] * X1.shape[0] * np.sqrt(la.det(vHv)) * (2 * np.pi) ** (self.k / 2))

    def _g1(self, X0, X1, v):
        Hxy = self.gamma * self.gamma * self._H(X0, X1)
        vHv = v.T.dot(Hxy).dot(v)
        return - self._f1(X0, X1, v) * Hxy.dot(v).dot(la.inv(vHv))

    def _f2(self, X0, X1, v):
        Hxy = self.gamma * self.gamma * self._H(X0, X1)
        vHv = v.T.dot(Hxy).dot(v)
        vHv_inv = la.inv(vHv)

        vx0 = X0.dot(v)
        vx1 = X1.dot(v)
        vx0c = vx0.dot(vHv_inv)
        vx1c = vx1.dot(vHv_inv)

        ret = 0.0
        for i in range(X0.shape[0]):
            ret += np.exp(-0.5 * ((vx0c[i] - vx1c) * (vx0[i] - vx1)).sum(axis=1)).sum()
        return ret

    def _g2(self, X0, X1, v):
        Hxy = self.gamma * self.gamma * self._H(X0, X1)
        vHv = v.T.dot(Hxy).dot(v)
        vHv_inv = la.inv(vHv)  # k x k

        vx0 = X0.dot(v)
        vx1 = X1.dot(v)
        vx0c = vx0.dot(vHv_inv)
        vx1c = vx1.dot(vHv_inv)

        eye = np.eye(v.shape[0])
        right_expr = (eye - Hxy.dot(v).dot(vHv_inv).dot(v.T))  # d x d

        d = v.shape[0]
        k = int(self.k)
        ret = 0.0
        for i in range(X0.shape[0]):
            f2_vals = np.exp(-0.5 * ((vx0c[i] - vx1c) * (vx0[i] - vx1)).sum(axis=1)).reshape(-1, 1)
            ws = (X0[i] - X1).reshape(X1.shape[0], d, 1)
            vxdiffs = (- f2_vals * (vx0[i] - vx1)).reshape(X1.shape[0], 1, k)
            ret += np.tensordot(ws, vxdiffs, ([0, 2], [0, 1]))

        return right_expr.dot(ret).dot(vHv_inv)

    def _ipx(self, X0, X1, v):
        return self._f1(X0, X1, v) * self._f2(X0, X1, v)

    def _d_ipx(self, X0, X1, v):
        return self._f1(X0, X1, v) * self._g2(X0, X1, v) + self._f2(X0, X1, v) * self._g1(X0, X1, v)
Esempio n. 39
0
    random.seed(42)
    print("Gathering examples...")
    # Use subsample of 200K for k-means and covariance estimates
    for i in random.sample(range(0, unlab_X.shape[2]), 200000):
        patches = view_as_windows(unlab_X[:, :, i], (w, w), step=s)
        re_shaped = numpy.reshape(patches,
                                  (patches.shape[0] * patches.shape[0], w * w))
        # normalize the patches, per sample
        re_shaped = preprocessing.scale(re_shaped, axis=1)
        X_unlab_patches.append(re_shaped)
    X_unlab_patches = numpy.vstack(X_unlab_patches)

    # build whitening transform matrix
    print("Fitting ZCA Whitening Transform...")
    cov = LedoitWolf()
    cov.fit(X_unlab_patches)  # fit covariance estimate
    D, U = numpy.linalg.eigh(cov.covariance_)
    V = numpy.sqrt(numpy.linalg.inv(numpy.diag(D + zca_eps)))
    Wh = numpy.dot(numpy.dot(U, V), U.T)
    mu = numpy.mean(X_unlab_patches, axis=0)
    X_unlab_patches = numpy.dot(X_unlab_patches - mu, Wh)

    # run k-means on unlabelled data
    print("Starting k-means...")
    clustr = sklearn.cluster.MiniBatchKMeans(n_clusters=n_clust,
                                             compute_labels=False,
                                             batch_size=300)
    k_means = clustr.fit(X_unlab_patches)

    def f_unsup(img):
        img_ptchs = view_as_windows(img, (w, w), step=s)
Esempio n. 40
0
def test_ledoit_wolf():
    """Tests LedoitWolf module on a simple dataset.

    """
    # test shrinkage coeff on a simple data set
    lw = LedoitWolf()
    lw.fit(X, assume_centered=True)
    assert_almost_equal(lw.shrinkage_, 0.00192, 4)
    assert_almost_equal(lw.score(X, assume_centered=True), -2.89795, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X,
                                                        assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
    scov.fit(X, assume_centered=True)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf()
    lw.fit(X_1d, assume_centered=True)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False)
    lw.fit(X, assume_centered=True)
    assert_almost_equal(lw.score(X, assume_centered=True), -2.89795, 4)
    assert(lw.precision_ is None)

    # Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    lw = LedoitWolf()
    lw.fit(X)
    assert_almost_equal(lw.shrinkage_, 0.007582, 4)
    assert_almost_equal(lw.score(X), 2.243483, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf()
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False)
    lw.fit(X)
    assert_almost_equal(lw.score(X), 2.2434839, 4)
    assert(lw.precision_ is None)
  X_unlab_patches = []
  random.seed(42)
  print "Gathering examples..."
  # Use subsample of 200K for k-means and covariance estimates
  for i in random.sample(range(0, unlab_X.shape[2]), 200000):
    patches = view_as_windows(unlab_X[:, :, i], (w, w), step=s)
    re_shaped = numpy.reshape(patches, (patches.shape[0]*patches.shape[0], w * w))
    # normalize the patches, per sample
    re_shaped = preprocessing.scale(re_shaped, axis=1)
    X_unlab_patches.append(re_shaped)
  X_unlab_patches = numpy.vstack(X_unlab_patches)

  # build whitening transform matrix
  print "Fitting ZCA Whitening Transform..."
  cov = LedoitWolf()
  cov.fit(X_unlab_patches)  # fit covariance estimate
  D, U = numpy.linalg.eigh(cov.covariance_)
  V = numpy.sqrt(numpy.linalg.inv(numpy.diag(D + zca_eps)))
  Wh = numpy.dot(numpy.dot(U, V), U.T)
  mu = numpy.mean(X_unlab_patches, axis=0)
  X_unlab_patches = numpy.dot(X_unlab_patches-mu, Wh)

  # run k-means on unlabelled data
  print "Starting k-means..."
  clustr = sklearn.cluster.MiniBatchKMeans(n_clusters=n_clust,
                                           compute_labels=False,
                                           batch_size=300)
  k_means = clustr.fit(X_unlab_patches)


  def f_unsup(img):
Esempio n. 42
0
real_cov = toeplitz(r**np.arange(n_features))
coloring_matrix = cholesky(real_cov)

n_samples_range = np.arange(6, 31, 1)
repeat = 100
lw_mse = np.zeros((n_samples_range.size, repeat))
oa_mse = np.zeros((n_samples_range.size, repeat))
lw_shrinkage = np.zeros((n_samples_range.size, repeat))
oa_shrinkage = np.zeros((n_samples_range.size, repeat))
for i, n_samples in enumerate(n_samples_range):
    for j in range(repeat):
        X = np.dot(np.random.normal(size=(n_samples, n_features)),
                   coloring_matrix.T)

        lw = LedoitWolf(store_precision=False, assume_centered=True)
        lw.fit(X)
        lw_mse[i, j] = lw.error_norm(real_cov, scaling=False)
        lw_shrinkage[i, j] = lw.shrinkage_

        oa = OAS(store_precision=False, assume_centered=True)
        oa.fit(X)
        oa_mse[i, j] = oa.error_norm(real_cov, scaling=False)
        oa_shrinkage[i, j] = oa.shrinkage_

# plot MSE
plt.subplot(2, 1, 1)
plt.errorbar(
    n_samples_range,
    lw_mse.mean(1),
    yerr=lw_mse.std(1),
    label="Ledoit-Wolf",
Esempio n. 43
0
class DCS_kd(BaseEstimator):
    def __init__(self, k=2, gamma=1.0, covariance_estimator='ledoit-wolf'):
        self.k = float(k)
        self.gamma = gamma
        self.covariance_estimator = covariance_estimator

        if covariance_estimator == 'empirical':
            self.cov = EmpiricalCovariance(store_precision=False)
        elif covariance_estimator == 'ledoit-wolf':
            self.cov = LedoitWolf(store_precision=False)
        else:
            raise NotImplementedError('%s is not implemented' %
                                      covariance_estimator)

        self.x0 = None
        self.x1 = None

    def fit(self, x, y):
        self.x0 = x[y == min(y)]
        self.x1 = x[y == max(y)]

    def __str__(self):
        return 'Analytical Cauchy-Schwarz Divergence in {}-d'.format(self.k)

    def value(self, v):
        # We need matrix, not vector
        v = v.reshape(-1, self.k)

        ipx0 = self._ipx(self.x0, self.x0, v)
        ipx1 = self._ipx(self.x1, self.x1, v)
        ipx2 = self._ipx(self.x0, self.x1, v)

        return np.log(ipx0) + np.log(ipx1) - 2 * np.log(ipx2)

    def derivative(self, v):
        # We need matrix, not vector
        v = v.reshape(-1, self.k)

        ret = (
            self._d_ipx(self.x0, self.x0, v) / self._ipx(self.x0, self.x0, v) +
            self._d_ipx(self.x1, self.x1, v) / self._ipx(self.x1, self.x1, v) -
            2 * self._d_ipx(self.x0, self.x1, v) /
            self._ipx(self.x0, self.x1, v))

        return ret.reshape(-1)

    def _H(self, X0, X1):
        n = (4.0 / (self.k + 2))**(2.0 / (self.k + 4))
        p = (-2.0 / (self.k + 4))
        return n * (X0.shape[0]**p * self.cov.fit(X0).covariance_ +
                    X1.shape[0]**p * self.cov.fit(X1).covariance_)

    def _f1(self, X0, X1, v):
        Hxy = self.gamma * self.gamma * self._H(X0, X1)
        vHv = v.T.dot(Hxy).dot(v)
        # return 1.0 / np.sqrt(la.det(vHv))
        return 1.0 / (X0.shape[0] * X1.shape[0] * np.sqrt(la.det(vHv)) *
                      (2 * np.pi)**(self.k / 2))

    def _g1(self, X0, X1, v):
        Hxy = self.gamma * self.gamma * self._H(X0, X1)
        vHv = v.T.dot(Hxy).dot(v)
        return -self._f1(X0, X1, v) * Hxy.dot(v).dot(la.inv(vHv))

    def _f2(self, X0, X1, v):
        Hxy = self.gamma * self.gamma * self._H(X0, X1)
        vHv = v.T.dot(Hxy).dot(v)
        vHv_inv = la.inv(vHv)

        vx0 = X0.dot(v)
        vx1 = X1.dot(v)
        vx0c = vx0.dot(vHv_inv)
        vx1c = vx1.dot(vHv_inv)

        ret = 0.0
        for i in range(X0.shape[0]):
            ret += np.exp(-0.5 * ((vx0c[i] - vx1c) *
                                  (vx0[i] - vx1)).sum(axis=1)).sum()
        return ret

    def _g2(self, X0, X1, v):
        Hxy = self.gamma * self.gamma * self._H(X0, X1)
        vHv = v.T.dot(Hxy).dot(v)
        vHv_inv = la.inv(vHv)  # k x k

        vx0 = X0.dot(v)
        vx1 = X1.dot(v)
        vx0c = vx0.dot(vHv_inv)
        vx1c = vx1.dot(vHv_inv)

        eye = np.eye(v.shape[0])
        right_expr = (eye - Hxy.dot(v).dot(vHv_inv).dot(v.T))  # d x d

        d = v.shape[0]
        k = int(self.k)
        ret = 0.0
        for i in range(X0.shape[0]):
            f2_vals = np.exp(-0.5 * ((vx0c[i] - vx1c) *
                                     (vx0[i] - vx1)).sum(axis=1)).reshape(
                                         -1, 1)
            ws = (X0[i] - X1).reshape(X1.shape[0], d, 1)
            vxdiffs = (-f2_vals * (vx0[i] - vx1)).reshape(X1.shape[0], 1, k)
            ret += np.tensordot(ws, vxdiffs, ([0, 2], [0, 1]))

        return right_expr.dot(ret).dot(vHv_inv)

    def _ipx(self, X0, X1, v):
        return self._f1(X0, X1, v) * self._f2(X0, X1, v)

    def _d_ipx(self, X0, X1, v):
        return self._f1(X0, X1, v) * self._g2(X0, X1, v) + self._f2(
            X0, X1, v) * self._g1(X0, X1, v)
def test_connectivity_measure_outputs():
    n_subjects = 10
    n_features = 49

    # Generate signals and compute covariances
    emp_covs = []
    ledoit_covs = []
    signals = []
    ledoit_estimator = LedoitWolf()
    for k in range(n_subjects):
        n_samples = 200 + k
        signal, _, _ = generate_signals(n_features=n_features, n_confounds=5,
                                        length=n_samples, same_variance=False)
        signals.append(signal)
        signal -= signal.mean(axis=0)
        emp_covs.append((signal.T).dot(signal) / n_samples)
        ledoit_covs.append(ledoit_estimator.fit(signal).covariance_)

    kinds = ["covariance", "correlation", "tangent", "precision",
             "partial correlation"]

    # Check outputs properties
    for cov_estimator, covs in zip([EmpiricalCovariance(), LedoitWolf()],
                                   [emp_covs, ledoit_covs]):
        input_covs = copy.copy(covs)
        for kind in kinds:
            conn_measure = ConnectivityMeasure(kind=kind,
                                               cov_estimator=cov_estimator)
            connectivities = conn_measure.fit_transform(signals)

            # Generic
            assert_true(isinstance(connectivities, np.ndarray))
            assert_equal(len(connectivities), len(covs))

            for k, cov_new in enumerate(connectivities):
                assert_array_equal(input_covs[k], covs[k])
                assert(is_spd(covs[k], decimal=7))

                # Positive definiteness if expected and output value checks
                if kind == "tangent":
                    assert_array_almost_equal(cov_new, cov_new.T)
                    gmean_sqrt = _map_eigenvalues(np.sqrt,
                                                  conn_measure.mean_)
                    assert(is_spd(gmean_sqrt, decimal=7))
                    assert(is_spd(conn_measure.whitening_, decimal=7))
                    assert_array_almost_equal(conn_measure.whitening_.dot(
                        gmean_sqrt), np.eye(n_features))
                    assert_array_almost_equal(gmean_sqrt.dot(
                        _map_eigenvalues(np.exp, cov_new)).dot(gmean_sqrt),
                        covs[k])
                elif kind == "precision":
                    assert(is_spd(cov_new, decimal=7))
                    assert_array_almost_equal(cov_new.dot(covs[k]),
                                              np.eye(n_features))
                elif kind == "correlation":
                    assert(is_spd(cov_new, decimal=7))
                    d = np.sqrt(np.diag(np.diag(covs[k])))
                    if cov_estimator == EmpiricalCovariance():
                        assert_array_almost_equal(d.dot(cov_new).dot(d),
                                                  covs[k])
                    assert_array_almost_equal(np.diag(cov_new),
                                              np.ones((n_features)))
                elif kind == "partial correlation":
                    prec = linalg.inv(covs[k])
                    d = np.sqrt(np.diag(np.diag(prec)))
                    assert_array_almost_equal(d.dot(cov_new).dot(d), -prec +
                                              2 * np.diag(np.diag(prec)))

    # Check the mean_
    for kind in kinds:
        conn_measure = ConnectivityMeasure(kind=kind)
        conn_measure.fit_transform(signals)
        assert_equal((conn_measure.mean_).shape, (n_features, n_features))
        if kind != 'tangent':
            assert_array_almost_equal(
                conn_measure.mean_,
                np.mean(conn_measure.transform(signals), axis=0))

    # Check that the mean isn't modified in transform
    conn_measure = ConnectivityMeasure(kind='covariance')
    conn_measure.fit(signals[:1])
    mean = conn_measure.mean_
    conn_measure.transform(signals[1:])
    assert_array_equal(mean, conn_measure.mean_)

    # Check vectorization option
    for kind in kinds:
        conn_measure = ConnectivityMeasure(kind=kind)
        connectivities = conn_measure.fit_transform(signals)
        conn_measure = ConnectivityMeasure(vectorize=True, kind=kind)
        vectorized_connectivities = conn_measure.fit_transform(signals)
        assert_array_almost_equal(vectorized_connectivities,
                                  sym_matrix_to_vec(connectivities))

    # Check not fitted error
    assert_raises_regex(
        ValueError, 'has not been fitted. ',
        ConnectivityMeasure().inverse_transform,
        vectorized_connectivities)

    # Check inverse transformation
    kinds.remove('tangent')
    for kind in kinds:
        # without vectorization: input matrices are returned with no change
        conn_measure = ConnectivityMeasure(kind=kind)
        connectivities = conn_measure.fit_transform(signals)
        assert_array_almost_equal(
            conn_measure.inverse_transform(connectivities), connectivities)

        # with vectorization: input vectors are reshaped into matrices
        # if diagonal has not been discarded
        conn_measure = ConnectivityMeasure(kind=kind, vectorize=True)
        vectorized_connectivities = conn_measure.fit_transform(signals)
        assert_array_almost_equal(
            conn_measure.inverse_transform(vectorized_connectivities),
            connectivities)

    # with vectorization if diagonal has been discarded
    for kind in ['correlation', 'partial correlation']:
        connectivities = ConnectivityMeasure(kind=kind).fit_transform(signals)
        conn_measure = ConnectivityMeasure(kind=kind, vectorize=True,
                                           discard_diagonal=True)
        vectorized_connectivities = conn_measure.fit_transform(signals)
        assert_array_almost_equal(
            conn_measure.inverse_transform(vectorized_connectivities),
            connectivities)

    for kind in ['covariance', 'precision']:
        connectivities = ConnectivityMeasure(kind=kind).fit_transform(signals)
        conn_measure = ConnectivityMeasure(kind=kind, vectorize=True,
                                           discard_diagonal=True)
        vectorized_connectivities = conn_measure.fit_transform(signals)
        diagonal = np.array([np.diagonal(conn) / sqrt(2) for conn in
                             connectivities])
        inverse_transformed = conn_measure.inverse_transform(
            vectorized_connectivities, diagonal=diagonal)
        assert_array_almost_equal(inverse_transformed, connectivities)
        assert_raises_regex(ValueError,
                            'can not reconstruct connectivity matrices',
                            conn_measure.inverse_transform,
                            vectorized_connectivities)

    # for 'tangent' kind, covariance matrices are reconstructed
    # without vectorization
    tangent_measure = ConnectivityMeasure(kind='tangent')
    displacements = tangent_measure.fit_transform(signals)
    covariances = ConnectivityMeasure(kind='covariance').fit_transform(
        signals)
    assert_array_almost_equal(
        tangent_measure.inverse_transform(displacements), covariances)

    # with vectorization
    # when diagonal has not been discarded
    tangent_measure = ConnectivityMeasure(kind='tangent', vectorize=True)
    vectorized_displacements = tangent_measure.fit_transform(signals)
    assert_array_almost_equal(
        tangent_measure.inverse_transform(vectorized_displacements),
        covariances)

    # when diagonal has been discarded
    tangent_measure = ConnectivityMeasure(kind='tangent', vectorize=True,
                                          discard_diagonal=True)
    vectorized_displacements = tangent_measure.fit_transform(signals)
    diagonal = np.array([np.diagonal(matrix) / sqrt(2) for matrix in
                         displacements])
    inverse_transformed = tangent_measure.inverse_transform(
        vectorized_displacements, diagonal=diagonal)
    assert_array_almost_equal(inverse_transformed, covariances)
    assert_raises_regex(ValueError,
                        'can not reconstruct connectivity matrices',
                        tangent_measure.inverse_transform,
                        vectorized_displacements)
Esempio n. 45
0
time_series = masker.fit_transform(func_filename,
                                   confounds=[confound_filename])

##########################################################################
# Display time series
import matplotlib.pyplot as plt
for time_serie, label in zip(time_series.T, labels):
    plt.plot(time_serie, label=label)

plt.title('Default Mode Network Time Series')
plt.xlabel('Scan number')
plt.ylabel('Normalized signal')
plt.legend()
plt.tight_layout()


##########################################################################
# Compute precision matrices
from sklearn.covariance import LedoitWolf
cve = LedoitWolf()
cve.fit(time_series)


##########################################################################
# Display connectome
from nilearn import plotting

plotting.plot_connectome(cve.precision_, dmn_coords,
                         title="Default Mode Network Connectivity")
plotting.show()
Esempio n. 46
0
    # Perform Factor analysis
    fa = FactorAnalysis(n_components=64, random_state=1000)
    fah = FactorAnalysis(n_components=64, random_state=1000)

    Xfa = fa.fit_transform(X)
    Xfah = fah.fit_transform(Xh)

    print('Factor analysis score X: {}'.format(fa.score(X)))
    print('Factor analysis score Xh: {}'.format(fah.score(Xh)))

    # Perform Lodoit-Wolf shrinkage
    ldw = LedoitWolf()
    ldwh = LedoitWolf()

    ldw.fit(X)
    ldwh.fit(Xh)

    print('Ledoit-Wolf score X: {}'.format(ldw.score(X)))
    print('Ledoit-Wolf score Xh: {}'.format(ldwh.score(Xh)))

    # Show the components
    fig, ax = plt.subplots(8, 8, figsize=(10, 10))

    for i in range(8):
        for j in range(8):
            ax[i, j].imshow(fah.components_[(i * 8) + j].reshape((28, 28)),
                            cmap='gray')
            ax[i, j].axis('off')

    plt.show()
Esempio n. 47
0
                                       high_pass=0.01,
                                       t_r=2.5,
                                       memory='nilearn_cache',
                                       memory_level=1,
                                       verbose=2)

func_filename = adhd_dataset.func[0]
confound_filename = adhd_dataset.confounds[0]

time_series = masker.fit_transform(func_filename,
                                   confounds=[confound_filename])

# Computing precision matrices ################################################
from sklearn.covariance import LedoitWolf
cve = LedoitWolf()
cve.fit(time_series)

# Displaying results ##########################################################
import matplotlib.pyplot as plt
from nilearn import plotting

# Display time series
for time_serie, label in zip(time_series.T, labels):
    plt.plot(time_serie, label=label)

plt.title('Default Mode Network Time Series')
plt.xlabel('Scan number')
plt.ylabel('Normalized signal')
plt.legend()
plt.tight_layout()
X = df_2.values[1:, :]

window_size = 300
slide_size = 30
no_samples = X.shape[0]
p = X.shape[1]
no_runs = math.floor((no_samples - window_size) / (slide_size))
print("We're running %s times" % no_runs)

X_new = X[0:window_size, :]
#ss = StandardScaler()
#X_new = ss.fit_transform(X_new)
#s = space.SPACE_BIC(verbose=True)
#s.fit_l2(X_new)
lw = LedoitWolf()
lw.fit(X_new)
prec = precision_matrix_to_partial_corr(lw.precision_)
l = lw.shrinkage_

np.fill_diagonal(prec, 0)
corr = covariance_matrix_to_corr(lw.covariance_)
np.fill_diagonal(corr, 0)
G = nx.from_numpy_matrix(corr)
G = nx.relabel_nodes(G, dict(zip(G.nodes(), company_names)))
node_attributes = dict(
    zip(company_names[list(range(len(company_sectors)))], company_sectors))
nx.set_node_attributes(G, node_attributes, 'sector')
G.graph['l'] = l
nx.write_graphml(G, "network_over_time_%s.graphml" % 0)
print("%s non-zero values" % np.count_nonzero(prec))
np.save("prec_0", lw.precision_)
Esempio n. 49
0
    import sklearn
except ImportError:
    has_sklearn = False
    print('sklearn not available')


def cov2corr(cov):
    std_ = np.sqrt(np.diag(cov))
    corr = cov / np.outer(std_, std_)
    return corr

if has_sklearn:
    from sklearn.covariance import LedoitWolf, OAS, MCD

    lw = LedoitWolf(store_precision=False)
    lw.fit(rr, assume_centered=False)
    cov_lw = lw.covariance_
    corr_lw = cov2corr(cov_lw)

    oas = OAS(store_precision=False)
    oas.fit(rr, assume_centered=False)
    cov_oas = oas.covariance_
    corr_oas = cov2corr(cov_oas)

    mcd = MCD()#.fit(rr, reweight=None)
    mcd.fit(rr, assume_centered=False)
    cov_mcd = mcd.covariance_
    corr_mcd = cov2corr(cov_mcd)

    titles = ['raw correlation', 'lw', 'oas', 'mcd']
    normcolor = None
# settings
real_cov = np.dot(coloring_matrix.T, coloring_matrix)
emp_cov = empirical_covariance(X_train)
loglik_real = -log_likelihood(emp_cov, linalg.inv(real_cov))

# #############################################################################
# Compare different approaches to setting the parameter

# GridSearch for an optimal shrinkage coefficient
tuned_parameters = [{'shrinkage': shrinkages}]
cv = GridSearchCV(ShrunkCovariance(), tuned_parameters, cv=5)
cv.fit(X_train)

# Ledoit-Wolf optimal shrinkage coefficient estimate
lw = LedoitWolf()
loglik_lw = lw.fit(X_train).score(X_test)

# OAS coefficient estimate
oa = OAS()
loglik_oa = oa.fit(X_train).score(X_test)

# #############################################################################
# Plot results
fig = plt.figure()
plt.title("Regularized covariance: likelihood and shrinkage coefficient")
plt.xlabel('Regularization parameter: shrinkage coefficient')
plt.ylabel('Error: negative log-likelihood on test data')
# range shrinkage curve
plt.loglog(shrinkages, negative_logliks, label="Negative log-likelihood")

plt.plot(plt.xlim(), 2 * [loglik_real], '--r',