Esempio n. 1
0
def lw(data, alphas):
    """
        Estimates the graph with Ledoit-Wolf estimator.

        Parameters
        ----------
        data: numpy ndarray
            The input data for to reconstruct/estimate a graph on. Features as columns and observations as rows.
        alphas: float
            The threshold on the precision matrix to determine edges.
        Returns
        -------
        adjacency matrix : the estimated adjacency matrix.
    """
    alpha=alphas
    scaler = StandardScaler()
    data = scaler.fit_transform(data)
    cov = LedoitWolf().fit(data)
    precision_matrix = cov.get_precision()
    n_features, _ = precision_matrix.shape
    mask1 = np.abs(precision_matrix) > alpha
    mask0 = np.abs(precision_matrix) <= alpha
    adjacency_matrix = np.zeros((n_features,n_features))
    adjacency_matrix[mask1] = 1
    adjacency_matrix[mask0] = 0
    adjacency_matrix[np.diag_indices_from(adjacency_matrix)] = 0
    return adjacency_matrix
Esempio n. 2
0
File: ctm.py Progetto: happyche/code
	def maximization(self):
		# mean maximization
		for i in range(self._K):
			mu[i] = mu_ss[i] / ndata_ss
		# covariance maximization
		for i in range(self._K):
			for j in range(self._K):
				cov[i,j] = (1.0/ ndata_ss) * cov_ss[i,j] + ndata_ss * mu[i] * mu[j] - mu_ss[i] * mu[j] - mu_ss[j] * mu[i]
		# covariance shrinkage
		lw = LedoitWolf()
		cov_result = lw.fit(cov,assume_centered=True).covariance_
		inv_cov = np.linalg.inv(cov_result)
		log_det_inv_cov = np.log(np.linalg.det(inv_cov))

		# topic maximization
		for i in range(self._K):
			sum_m = 0 
			for j in range(self._W):
				sum_m += beta_ss[i,j]

			if sum_m == 0:
				sum_m = -1000 * self._W
			else:
				sum_m = np.log(sum_m)
			for j in range(self._W):
				log_beta[i,j] = np.log(beta_ss[i,j] - sum_m)
Esempio n. 3
0
def test_ledoit_wolf_small():
    # Compare our blocked implementation to the naive implementation
    X_small = X[:, :4]
    lw = LedoitWolf()
    lw.fit(X_small)
    shrinkage_ = lw.shrinkage_

    assert_almost_equal(shrinkage_, _naive_ledoit_wolf_shrinkage(X_small))
def test_ledoit_wolf_small():
    # Compare our blocked implementation to the naive implementation
    X_small = X[:, :4]
    lw = LedoitWolf()
    lw.fit(X_small)
    shrinkage_ = lw.shrinkage_

    assert_almost_equal(shrinkage_, _naive_ledoit_wolf_shrinkage(X_small))
Esempio n. 5
0
def LW_est(X):
    '''
    Ledoit-Wolf optimal shrinkage coefficient estimate
    X_size = (n_samples, n_features)
    '''

    lw = LedoitWolf()
    cov_lw = lw.fit(X).covariance_

    return cov_lw
Esempio n. 6
0
def covarianceEstimation(daily_returns, cov_estimator):
    lw = LedoitWolf()
    if cov_estimator == "shrinkage":
        return lw.fit(daily_returns).covariance_
    elif cov_estimator == "empirical":
        return daily_returns.cov()
    elif cov_estimator == "multifactor":
        # FIXME
        return None
    else:
        raise Exception("协方差矩阵类型为[shrinkage,empirical,multifactor]")
Esempio n. 7
0
    def shrinked_covariance(returns, price_data=False, shrinkage_type='basic', assume_centered=False,
                            basic_shrinkage=0.1):
        """
        Calculates the Covariance estimator with shrinkage for a dataframe of asset prices or returns.

        This function allows three types of shrinkage - Basic, Ledoit-Wolf and Oracle Approximating Shrinkage.
        It is a wrap of the sklearn's ShrunkCovariance, LedoitWolf and OAS classes. According to the
        scikit-learn User Guide on Covariance estimation:

        "Sometimes, it even occurs that the empirical covariance matrix cannot be inverted for numerical
        reasons. To avoid such an inversion problem, a transformation of the empirical covariance matrix
        has been introduced: the shrinkage. Mathematically, this shrinkage consists in reducing the ratio
        between the smallest and the largest eigenvalues of the empirical covariance matrix".

        Link to the documentation:
        <https://scikit-learn.org/stable/modules/covariance.html>`_

        If a dataframe of prices is given, it is transformed into a dataframe of returns using
        the calculate_returns method from the ReturnsEstimators class.

        :param returns: (pd.DataFrame) Dataframe where each column is a series of returns or prices for an asset.
        :param price_data: (bool) Flag if prices of assets are used and not returns. (False by default)
        :param shrinkage_type: (str) Type of shrinkage to use. (``basic`` by default, ``lw``, ``oas``, ``all``)
        :param assume_centered: (bool) Flag for data with mean almost, but not exactly zero.
                                       (Read documentation for chosen shrinkage class, False by default)
        :param basic_shrinkage: (float) Between 0 and 1. Coefficient in the convex combination for basic shrinkage.
                                        (0.1 by default)
        :return: (np.array) Estimated covariance matrix. Tuple of covariance matrices if shrinkage_type = ``all``.
        """

        # Calculating the series of returns from series of prices
        if price_data:
            # Class with returns calculation function
            ret_est = ReturnsEstimators()

            # Calculating returns
            returns = ret_est.calculate_returns(returns)

        # Calculating the covariance matrix for the chosen method
        if shrinkage_type == 'basic':
            cov_matrix = ShrunkCovariance(assume_centered=assume_centered, shrinkage=basic_shrinkage).fit(
                returns).covariance_
        elif shrinkage_type == 'lw':
            cov_matrix = LedoitWolf(assume_centered=assume_centered).fit(returns).covariance_
        elif shrinkage_type == 'oas':
            cov_matrix = OAS(assume_centered=assume_centered).fit(returns).covariance_
        else:
            cov_matrix = (
                ShrunkCovariance(assume_centered=assume_centered, shrinkage=basic_shrinkage).fit(returns).covariance_,
                LedoitWolf(assume_centered=assume_centered).fit(returns).covariance_,
                OAS(assume_centered=assume_centered).fit(returns).covariance_)

        return cov_matrix
Esempio n. 8
0
def similarity_measure_mahalanobis(ds_tar, ds_src, results, p_value=0.95):

    print 'Computing Mahalanobis similarity...'

    # TODO: The function parameters must be the two datasets,
    # TODO: src is the one with parameter calculation, second is the similarity one

    #  Get classifier from results
    classifier = results['fclf']

    # Make prediction on training set, to understand data distribution
    ## TODO: Evaluate if it is correct!
    classifier_predictions_src = classifier.predict(ds_src)
    prediction_mask = np.array(classifier_predictions_src) == ds_src.targets
    example_dist = dict()

    # Extract feature selected from each dataset
    if isinstance(classifier, FeatureSelectionClassifier):
        f_selection = results['fclf'].mapper
        ds_tar = f_selection(ds_tar)
        ds_src = f_selection(ds_src)
    '''
    Get class distribution information: mean and covariance
    '''

    for label in np.unique(ds_src.targets):

        # Get examples correctly classified
        mask = ds_src.targets == label
        example_dist[label] = dict()
        true_ex = ds_src.samples[mask * prediction_mask]

        # Get Mean and Covariance to draw the distribution
        # We evaluate mean and cov only on well-classified examples
        mean_ = np.mean(true_ex, axis=0)
        example_dist[label]['mean'] = mean_

        print 'Estimation of covariance matrix for ' + label + ' class...'
        print true_ex.shape

        try:
            #cov_ = MinCovDet().transform(true_ex)
            cov_ = LedoitWolf().transform(true_ex)
            #cov_ = EmpiricalCovariance().transform(true_ex)
            #cov_ = GraphLasso(alpha=0.5).transform(true_ex)
            #cov_ = OAS(alpha=0.1).transform(true_ex)
        except MemoryError, err:
            print 'Method is LedoitWolf'
            cov_ = LedoitWolf(block_size=15000).transform(true_ex)

        example_dist[label]['i_cov'] = cov_.precision_
        print 'Inverted covariance estimated...'
Esempio n. 9
0
def partial_corrconn(activity_matrix,
                     estimator='EmpiricalCovariance',
                     target_ts=None):
    """
    activity_matrix:    Activity matrix should be nodes X time
    target_ts:             Optional, used when only a single target time series (returns 1 X nnodes matrix)
    estimator:      can be either 'Empirical covariance' the default, or 'LedoitWolf' partial correlation with Ledoit-Wolf shrinkage

    Output: connectivity_mat, formatted targets X sources
    Credit goes to nilearn connectivity_matrices.py which contains code that was simplified for this use.
    """

    nnodes = activity_matrix.shape[0]
    timepoints = activity_matrix.shape[1]
    if nnodes > timepoints:
        print('activity_matrix shape: ', np.shape(activity_matrix))
        raise Exception(
            'More nodes (regressors) than timepoints! Use regularized regression'
        )
    if 2 * nnodes > timepoints:
        print('activity_matrix shape: ', np.shape(activity_matrix))
        print('Consider using a shrinkage method')

    if target_ts is None:
        connectivity_mat = np.zeros((nnodes, nnodes))
        # calculate covariance
        if estimator is 'LedoitWolf':
            cov_estimator = LedoitWolf(store_precision=False)
        elif estimator is 'EmpiricalCovariance':
            cov_estimator = EmpiricalCovariance(store_precision=False)
        covariance = cov_estimator.fit(activity_matrix.T).covariance_

        # calculate precision
        precision = linalg.inv(covariance)

        # precision to partial corr
        diagonal = np.atleast_2d(1. / np.sqrt(np.diag(precision)))
        correlation = precision * diagonal * diagonal.T

        # Force exact 0. on diagonal
        np.fill_diagonal(correlation, 0.)
        connectivity_mat = -correlation
    else:
        #Computing values for a single target node
        connectivity_mat = np.zeros((nnodes, 1))
        X = activity_matrix.T
        y = target_ts
        #Note: LinearRegression fits intercept by default (intercept beta not included in coef_ output)
        reg = LinearRegression().fit(X, y)
        connectivity_mat = reg.coef_

    return connectivity_mat
Esempio n. 10
0
def test_ledoit_wolf_large():
    # test that ledoit_wolf doesn't error on data that is wider than block_size
    rng = np.random.RandomState(0)
    # use a number of features that is larger than the block-size
    X = rng.normal(size=(10, 20))
    lw = LedoitWolf(block_size=10).fit(X)
    # check that covariance is about diagonal (random normal noise)
    assert_almost_equal(lw.covariance_, np.eye(20), 0)
    cov = lw.covariance_

    # check that the result is consistent with not splitting data into blocks.
    lw = LedoitWolf(block_size=25).fit(X)
    assert_almost_equal(lw.covariance_, cov)
Esempio n. 11
0
    def __init__(self, k=2, gamma=1.0, covariance_estimator='ledoit-wolf'):
        self.k = float(k)
        self.gamma = gamma
        self.covariance_estimator = covariance_estimator

        if covariance_estimator == 'empirical':
            self.cov = EmpiricalCovariance(store_precision=False)
        elif covariance_estimator == 'ledoit-wolf':
            self.cov = LedoitWolf(store_precision=False)
        else:
            raise NotImplementedError('%s is not implemented' % covariance_estimator)

        self.x0 = None
        self.x1 = None
Esempio n. 12
0
def max_IC_weight(ic_df,
                  factors_dict,
                  holding_period,
                  covariance_type="shrink"):
    """
    输入ic_df(ic值序列矩阵),指定持有期和滚动窗口,给出相应的多因子组合权重
    :param factors_dict: 若干因子组成的字典(dict),形式为:
                         {"factor_name_1":factor_1,"factor_name_2":factor_2}
                        每个因子值格式为一个pd.DataFrame,索引(index)为date,column为asset
    :param ic_df: ic值序列矩阵 (pd.Dataframe),索引(index)为datetime,columns为各因子名称。
             如:

                       BP	   CFP	   EP	  ILLIQUIDITY	REVS20	   SRMI	   VOL20
            date
            2016-06-24	0.165260	0.002198	0.085632	-0.078074	0.173832	0.214377	0.068445
            2016-06-27	0.165537	0.003583	0.063299	-0.048674	0.180890	0.202724	0.081748
            2016-06-28	0.135215	0.010403	0.059038	-0.034879	0.111691	0.122554	0.042489
            2016-06-29	0.068774	0.019848	0.058476	-0.049971	0.042805	0.053339	0.079592
            2016-06-30	0.039431	0.012271	0.037432	-0.027272	0.010902	0.077293	-0.050667

    :param holding_period: 持有周期(int)
    :param covariance_type:"shrink"/"simple" 协防差矩阵估算方式 Ledoit-Wolf压缩估计或简单估计
    :return: weight_df:使用Sample协方差矩阵估算方法得到的因子权重(pd.Dataframe),
             索引(index)为datetime,columns为待合成的因子名称。
    """
    weight_df = pd.DataFrame(index=ic_df.index, columns=ic_df.columns)
    lw = LedoitWolf()
    # 最大化第t天的ic,用到了截止到t+period的数据(算收益),
    # 算得的权重用于t+period的因子进行加权
    for dt in ic_df.index:
        f_dt = pd.concat([
            factors_dict[factor_name].loc[dt] for factor_name in ic_df.columns
        ],
                         axis=1).dropna()
        if len(f_dt) == 0:
            continue
        if covariance_type == "shrink":
            try:
                f_cov_mat = lw.fit(f_dt.as_matrix()).covariance_
            except:
                f_cov_mat = np.mat(np.cov(f_dt.T.as_matrix()).astype(float))
        else:
            f_cov_mat = np.mat(np.cov(f_dt.T.as_matrix()).astype(float))
        inv_f_cov_mat = np.linalg.inv(f_cov_mat)
        weight = inv_f_cov_mat * np.mat(ic_df.loc[dt].values).reshape(
            len(inv_f_cov_mat), 1)
        weight = np.array(weight.reshape(len(weight), ))[0]
        weight_df.ix[dt] = weight / np.sum(np.abs(weight))

    return weight_df.shift(holding_period)
Esempio n. 13
0
    def __init__(self, k=2, gamma=1.0, covariance_estimator='ledoit-wolf'):
        self.k = float(k)
        self.gamma = gamma
        self.covariance_estimator = covariance_estimator

        if covariance_estimator == 'empirical':
            self.cov = EmpiricalCovariance(store_precision=False)
        elif covariance_estimator == 'ledoit-wolf':
            self.cov = LedoitWolf(store_precision=False)
        else:
            raise NotImplementedError('%s is not implemented' %
                                      covariance_estimator)

        self.x0 = None
        self.x1 = None
Esempio n. 14
0
	def maximization(self):
		'''
		M-step of EM algorithm, use scikit.learn's LedoitWolf method to perfom
		covariance matrix shrinkage.
		Arguments:
			sufficient statistics, i.e. model parameters
		Returns:
			the updated sufficient statistics which all in self definition, so no return values
		'''
		logger.info("running maximization function")
		logger.info("mean maximization")
		mu = np.divide(self.mu, self.ndata)
		logger.info("covariance maximization")
		for i in range(self._K):
			for j in range(self._K):
				self.cov[i, j] = (1.0 / self.ndata) * self.cov[i, j] + self.ndata * mu[i] * mu[j] - self.mu[i] * mu[j] - self.mu[j] * mu[i]
		logger.info(" performing covariance shrinkage using sklearn module")
		lw = LedoitWolf()
		cov_result = lw.fit(self.cov, assume_centered=True).covariance_
		self.inv_cov = np.linalg.inv(cov_result)
		self.log_det_inv_cov = math_utli.safe_log(np.linalg.det(self.inv_cov))

		logger.info("topic maximization")
		for i in range(self._K):
			sum_m = 0
			sum_m += np.sum(self.beta, axis=0)[i]

			if sum_m == 0:
				sum_m = -1000 * self._W
			else:
				sum_m = np.log(sum_m)

			for j in range(self._W):
				self.log_beta[i, j] = math_utli.safe_log(self.beta[i, j] - sum_m)

		logger.info("write model parameters to file")
		logger.info("write gaussian")
		with open('ctm_nu', 'w') as ctm_nu_dump:
			cPickle.dump(self.nu, ctm_nu_dump)
		with open('ctm_cov', 'w') as ctm_cov_dump:
			cPickle.dump(self.cov, ctm_cov_dump)
		with open('ctm_inv_cov', 'w') as ctm_inv_cov_dump:
			cPickle.dump(self.inv_cov, ctm_inv_cov_dump)
		with open('ctm_log_det_inv_cov', 'w') as ctm_log_det_inv_cov_dump:
			cPickle.dump(self.log_det_inv_cov, ctm_log_det_inv_cov_dump)
		logger.info("write topic matrix")
		with open('ctm_log_beta', 'w') as ctm_log_beta_dump:
			cPickle.dump(self.log_beta, ctm_log_beta_dump)
 def estimatorLedoitWolf(self):
     #remove Date column for this function
     trimmedData = self.data.drop('Date', axis=1)
     cov = LedoitWolf().fit(trimmedData).covariance_  #centers the data
     assert cov.shape == self.expectedCovShape
     self.cov = cov
     return self.cov
Esempio n. 16
0
def filter_W_fromVcv(vcv, variance_perc=1.0):
    '''vcv is a filtered value for the Vcv, with shapes T,N,N.
    It filters init_W,init_df that are the initial distribution parameters for W's posterior.
    W is the diffusion matrix of the components of the cholesky-decomposition of vcv.
    It filters also init_vcv_std, the standard deviations of this components' posteriors.
    '''
    [T, N, _] = vcv.shape
    num_tril = int(N * (N + 1) / 2)
    chol_vcv = np.zeros([T, int(N * (N + 1) / 2)])
    ind = indexes_librarian(N)
    for t in range(T):
        cvcv = np.linalg.cholesky(vcv[t])
        chol_vcv[t, ind.spiral_diag] = inv_softplus(cvcv[ind.diag[0],
                                                         ind.diag[1]])
        chol_vcv[t, ind.spiral_udiag] = cvcv[ind.udiag[0], ind.udiag[1]]
    cov = LedoitWolf().fit(chol_vcv[1:, :] - chol_vcv[:-1, :])
    init_W = cov.covariance_
    try:
        np.linalg.cholesky(init_W)
    except:
        #adds a constant term if init_W is singular
        print('W resulted singular, a correction term (I*1e-4) is added')
        init_W += np.eye(num_tril) * 1e-4
    init_df = np.max([4 * num_tril / variance_perc, num_tril])
    init_W *= 2
    init_vcv_std = np.abs(chol_vcv) * 0.1 / N * variance_perc
    #init_vcv_std=np.tile(np.reshape(np.abs(vcv).mean(axis=0),[1,N,N]),[T,1,1])/np.sqrt(N)*variance_perc
    return np.float32(init_W), np.float32(init_df), np.float32(init_vcv_std)
Esempio n. 17
0
def query_samples_and_probabilities(pydc,query,evidence,var,std=False):
  pydc.queryWithSamples(NUM_SAMPLES,query,evidence,var,FLAG,BIGNUM)
  parsed_samples = ast.literal_eval(pydc.samples)
  values = []
  weights = []
  for sample in parsed_samples:
    x, w = sample[0], sample[1]
    values  += [x]
    weights += [w]
  values, weights = np.array(values), np.array(weights)
  if std:
    avg, std = weighted_avg_and_std(values, weights)
    return avg, std
  else:
    #values = values + 1e-5*np.random.rand(*values.shape)
    avg, cov = weighted_avg_and_cov(values, weights)
    print avg
    #X = np.random.multivariate_normal(mean=avg,cov=cov,size=100)
    #shcov = LedoitWolf().fit(X)
    #assert cov is positive-semidefinite
    try:
      assert(np.all(np.linalg.eigvals(cov) >= 0))
    except AssertionError:
      X = np.random.multivariate_normal(mean=avg,cov=cov,size=100)
      shcov = LedoitWolf().fit(X)
      avg, cov = shcov.location_, shcov.covariance_
      #assert(np.all(np.linalg.eigvals(cov) >= 0))
    return (avg, cov)
Esempio n. 18
0
    def _simulate_covariance(mu_vector, cov_matrix, num_obs, lw_shrinkage=False):
        """
        Derives an empirical vector of means and an empirical covariance matrix.

        Based on the set of true means vector and covariance matrix of X distributions,
        the function generates num_obs observations for every X.
        Based on these observations simulated vector of means and the simulated covariance
        matrix are obtained.

        :param mu_vector: (np.array) True means vector for X distributions
        :param cov_matrix: (np.array) True covariance matrix for X distributions
        :param num_obs: (int) Number of observations to draw for every X
        :param lw_shrinkage: (bool) Flag to apply Ledoit-Wolf shrinkage to X (False by default)
        :return: (np.array, np.array) Empirical means vector, empirical covariance matrix
        """

        # Generating a matrix of num_obs observations for X distributions
        observations = np.random.multivariate_normal(mu_vector.flatten(), cov_matrix, size=num_obs)

        # Empirical means vector calculation
        mu_simulated = observations.mean(axis=0).reshape(-1, 1)

        if lw_shrinkage:  # If applying Ledoit-Wolf shrinkage
            cov_simulated = LedoitWolf().fit(observations).covariance_

        else:  # Simple empirical covariance matrix
            cov_simulated = np.cov(observations, rowvar=False)

        return mu_simulated, cov_simulated
Esempio n. 19
0
def compute_connectivity_subject(conn, masker, func, confound=None):
    """ Returns connectivity of one fMRI for a given atlas
    """

    ts = do_mask_img(masker, func, confound)

    if conn == 'gl':
        fc = GraphLassoCV(max_iter=1000)
    elif conn == 'lw':
        fc = LedoitWolf()
    elif conn == 'oas':
        fc = OAS()
    elif conn == 'scov':
        fc = ShrunkCovariance()

    fc = Bunch(covariance_=0, precision_=0)

    if conn == 'corr' or conn == 'pcorr':
        fc = Bunch(covariance_=0, precision_=0)
        fc.covariance_ = np.corrcoef(ts)
        fc.precision_ = partial_corr(ts)
    else:
        fc.fit(ts)
    ind = np.tril_indices(ts.shape[1], k=-1)
    return fc.covariance_[ind], fc.precision_[ind]
Esempio n. 20
0
def weight_opt(returns,benchmark, lower = 0, upper = 1, ph=2**7, cov_method='sample', seed = 123):
    np.random.seed(seed)
    n_asset, n_sample = returns.shape
    rets = np.asmatrix(returns)  
    #N = 10
    #phs = [2**(t-2) for t in range(N)]  
    # Convert to cvxopt matrices 
    if cov_method == 'sample':
        Cov = opt.matrix(np.cov(rets,benchmark))
    elif cov_method == 'lw':
        Cov = opt.matrix(LedoitWolf().fit(np.append(np.transpose(rets),benchmark.reshape(n_sample,1), axis=1)).covariance_)
    else:
        raise ValueError('cov_method should be in {}'.format({'sample', 'lw'}))
    S = Cov[:n_asset,:n_asset]
    r_mean = opt.matrix(np.nanmean(rets, axis=1)) # n*1
    Cb = Cov[:n_asset,n_asset]
    # Create constraint matrices  
    G = opt.matrix(np.append(np.eye(n_asset),-np.eye(n_asset),axis = 0))   # 2n x n identity matrix  
    h = opt.matrix(np.append(upper*np.ones((n_asset,1)),-lower*np.ones((n_asset,1)),axis = 0)) 
    A = opt.matrix(1.0, (1, n_asset))  
    b = opt.matrix(1.0)  
    # Calculate efficient frontier weights using quadratic programming  
    x = solvers.qp(ph*S, -ph*Cb-r_mean, G, h, A, b)['x']
    #portfolios = [solvers.qp(ph*S, -ph*Cb-r_mean, G, h, A, b)['x']  
    #              for ph in phs]  
    # CALCULATE RISKS AND RETURNS FOR FRONTIER  
    ret = blas.dot(r_mean, x)
    #[blas.dot(r_mean, x) for x in portfolios]  
    errors = blas.dot(x, S*x)+Cov[n_asset,n_asset]-2*blas.dot(Cb,x)
    #[blas.dot(x, S*x)+Cov[n_asset,n_asset]-2*blas.dot(Cb,x) for x in portfolios]  
    return np.transpose(np.array(x))[0], ret, errors#, ret_opt, risk_opt   
def compute_network_connectivity_subject(conn, func, masker, rois):
    """ Returns connectivity of one fMRI for a given atlas
    """
    ts = masker.fit_transform(func)
    ts = np.asarray(ts)[:, rois]

    if conn == 'gl':
        fc = GraphLassoCV(max_iter=1000)
    elif conn == 'lw':
        fc = LedoitWolf()
    elif conn == 'oas':
        fc = OAS()
    elif conn == 'scov':
        fc = ShrunkCovariance()

        fc = Bunch(covariance_=0, precision_=0)

    if conn == 'corr' or conn == 'pcorr':
        fc = Bunch(covariance_=0, precision_=0)
        fc.covariance_ = np.corrcoef(ts)
        fc.precision_ = partial_corr(ts)
    else:
        fc.fit(ts)
    ind = np.tril_indices(ts.shape[1], k=-1)
    return fc.covariance_[ind], fc.precision_[ind]
def test_lda_predict():
    # Test LDA classification.
    # This checks that LDA implements fit and predict and returns correct
    # values for simple toy data.
    for test_case in solver_shrinkage:
        solver, shrinkage = test_case
        clf = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage)
        y_pred = clf.fit(X, y).predict(X)
        assert_array_equal(y_pred, y, "solver %s" % solver)

        # Assert that it works with 1D data
        y_pred1 = clf.fit(X1, y).predict(X1)
        assert_array_equal(y_pred1, y, "solver %s" % solver)

        # Test probability estimates
        y_proba_pred1 = clf.predict_proba(X1)
        assert_array_equal((y_proba_pred1[:, 1] > 0.5) + 1, y,
                           "solver %s" % solver)
        y_log_proba_pred1 = clf.predict_log_proba(X1)
        assert_allclose(
            np.exp(y_log_proba_pred1),
            y_proba_pred1,
            rtol=1e-6,
            atol=1e-6,
            err_msg="solver %s" % solver,
        )

        # Primarily test for commit 2f34950 -- "reuse" of priors
        y_pred3 = clf.fit(X, y3).predict(X)
        # LDA shouldn't be able to separate those
        assert np.any(y_pred3 != y3), "solver %s" % solver

    clf = LinearDiscriminantAnalysis(solver="svd", shrinkage="auto")
    with pytest.raises(NotImplementedError):
        clf.fit(X, y)

    clf = LinearDiscriminantAnalysis(solver="lsqr",
                                     shrinkage=0.1,
                                     covariance_estimator=ShrunkCovariance())
    with pytest.raises(
            ValueError,
            match=("covariance_estimator and shrinkage "
                   "parameters are not None. "
                   "Only one of the two can be set."),
    ):
        clf.fit(X, y)

    # test bad solver with covariance_estimator
    clf = LinearDiscriminantAnalysis(solver="svd",
                                     covariance_estimator=LedoitWolf())
    with pytest.raises(ValueError,
                       match="covariance estimator is not supported with svd"):
        clf.fit(X, y)

    # test bad covariance estimator
    clf = LinearDiscriminantAnalysis(solver="lsqr",
                                     covariance_estimator=KMeans(
                                         n_clusters=2, n_init="auto"))
    with pytest.raises(ValueError):
        clf.fit(X, y)
Esempio n. 23
0
    def __call__(self, train_list, rest_list, clear_after_use=False):

        print("Apply Whitening...")

        if clear_after_use:
            self.sigma_neg_sqrt = None
            self.shrinkage_parameter = None

        if self.sigma_neg_sqrt is None:
            train_stacked = np.concatenate([d.x for d in train_list], axis=0)
            # Fit LedoitWolf for covariance estimation
            lw = LedoitWolf().fit(train_stacked)
            self.shrinkage_parameter = lw.shrinkage_
            print("   Estimated shrinkage-parameter={:.3f}".format(
                self.shrinkage_parameter))
            # estimated covariance matrix
            sigma = lw.covariance_
            # eigenvalue decomposition
            eig_values, eig_vectors = np.linalg.eig(sigma)
            # negative square root of eigenvalues
            eig_values_neg_sqrt = np.diag(1 / np.sqrt(eig_values + self.eps))
            # negative square root of sigma
            self.sigma_neg_sqrt = np.dot(
                np.dot(eig_vectors, eig_values_neg_sqrt), eig_vectors.T)

        def tensor_whiten(data):
            x = data.x

            x = np.dot(x, self.sigma_neg_sqrt)

            return RawData.create_from_ref(data, x=x)

        return self.transform(tensor_whiten, train_list, rest_list)
def postProcessing(nifti_file, subject_key, spheres_masker):
    """Perform post processing
	param nifti_file: string. path to the nifty file
    param subject_key: string. subject's key
	return: dictionary raw. 
		key: subject's key . 
		value: {"time_series" : matrix of time series (time_points,rois), "covariance" : covariance matrix of atlas rois (rois, rois),
			"correlation" : correlation matrix of atlas rois (rois, rois)}
    """
    try:
        print("subject_key: " + subject_key)
        print("Extract timeseries")
        # Extract the time series
        print(nifti_file)
        timeseries = spheres_masker.fit_transform(nifti_file, confounds=None)
        print("Extract covariance matrix")
        cov_measure = ConnectivityMeasure(cov_estimator=LedoitWolf(
            assume_centered=False, block_size=1000, store_precision=False),
                                          kind='covariance')
        cov = []
        cor = []
        cov = cov_measure.fit_transform([timeseries])[0, :, :]
        print("Extract correlation matrix")
        cor = nilearn.connectome.cov_to_corr(cov)
    except:

        raise Exception("subject_key: %s \n" % subject_key +
                        traceback.format_exc())
    return (subject_key, {
        "time_series": timeseries,
        "covariance": cov,
        "correlation": cor
    })
Esempio n. 25
0
    def __init__(self, sharpes, returns):
        """
        Initialize AuthorModelBuilder object.

        Parameters
        ----------
        sharpes : pd.DataFrame
            Long-format DataFrame of in-sample Sharpe ratios (from user-run
            backtests), indexed by user, algorithm and code ID.
            Note that currently, backtests are deduplicated based on code id.
            See fit_authors for more information.
        """
        self.num_authors = sharpes.meta_user_id.nunique()
        self.num_algos = sharpes.meta_algorithm_id.nunique()
        # For num_backtests, nunique() and count() should be the same
        self.num_backtests = sharpes.meta_code_id.nunique()

        # Which algos correspond to which authors?
        df = (sharpes.loc[:, ['meta_user_id', 'meta_algorithm_id']].
              drop_duplicates(
                  subset='meta_algorithm_id',
                  keep='first').reset_index().meta_user_id.astype(str))
        self.author_to_algo_encoding = LabelEncoder().fit_transform(df)

        # Which backtests correspond to which algos?
        df = sharpes.meta_algorithm_id.astype(str)
        self.algo_to_backtest_encoding = LabelEncoder().fit_transform(df)

        # Which backtests correspond to which authors?
        df = sharpes.meta_user_id.astype(str)
        self.author_to_backtest_encoding = LabelEncoder().fit_transform(df)

        # Construct correlation matrix.
        # 0 is a better estimate for mean returns than the sample mean!
        returns_ = returns / returns.std()
        self.corr = LedoitWolf(assume_centered=True).fit(returns_).covariance_

        self.model = self._build_model(sharpes, self.corr)

        self.coords = {
            'meta_user_id': sharpes.meta_user_id.drop_duplicates().values,
            'meta_algorithm_id':
            sharpes.meta_algorithm_id.drop_duplicates().values,
            'meta_code_id': sharpes.meta_code_id.values
        }

        self.dims = {
            'mu_global': (),
            'mu_author': ('meta_user_id', ),
            'mu_author_raw': ('meta_user_id', ),
            'mu_author_sd': (),
            'mu_algo': ('meta_algorithm_id', ),
            'mu_algo_raw': ('meta_algorithm_id', ),
            'mu_algo_sd': (),
            'mu_backtest': ('meta_code_id', ),
            'sigma_backtest': ('meta_code_id', ),
            'alpha_author': ('meta_user_id', ),
            'alpha_algo': ('meta_algorithm_id', )
        }
Esempio n. 26
0
def LedoitWolf_covMatrix(X):
    logger.info(
        'Se realiza el calculo de la matriz de covarianza con Shrinkage')
    cov = LedoitWolf().fit(X)
    cov_matrix = cov.covariance_
    mean_vector = cov.location_

    return cov_matrix, mean_vector
Esempio n. 27
0
def GetModelParams(DataFrame, ColumnIndex):

    cDataSet = DataFrame

    cData0 = cDataSet[cDataSet['target'] == 0]
    cData1 = cDataSet[cDataSet['target'] == 1]

    bData0 = np.array(cData0[ColumnIndex])
    bData1 = np.array(cData1[ColumnIndex])

    Cov0 = LedoitWolf(assume_centered=False).fit(bData0)
    Cov1 = LedoitWolf(assume_centered=False).fit(bData1)

    Mean0 = bData0.mean(axis=0)
    Mean1 = bData1.mean(axis=0)

    return Cov0.covariance_, Cov1.covariance_, Mean0, Mean1
Esempio n. 28
0
def prior_vector_variability(x):
    """
    Estimate the covariance matrix of x with the LedoitWolf estimator
    :param x: an array of dim (t,n)
    :return: The estimated covariance matrix
    """
    dx = LedoitWolf().fit(x).covariance_
    return dx
def simCovMu(mu0, cov0, nObs, shrink=False):
    x = np.random.multivariate_normal(mu0.flatten(), cov0, size = nObs)
    #print(x.shape)
    mu1 = x.mean(axis = 0).reshape(-1,1) #calc mean of columns of rand matrix
    #print(mu1.shape)
    if shrink: cov1 = LedoitWolf().fit(x).covariance_
    else: cov1 = np.cov(x, rowvar=0)
    return mu1, cov1
Esempio n. 30
0
def max_IR_weight(ic_df,
                  holding_period,
                  rollback_period=120,
                  covariance_type="shrink"):
    """
    输入ic_df(ic值序列矩阵),指定持有期和滚动窗口,给出相应的多因子组合权重
    :param ic_df: ic值序列矩阵 (pd.Dataframe),索引(index)为datetime,columns为各因子名称。
             如:

                       BP	   CFP	   EP	  ILLIQUIDITY	REVS20	   SRMI	   VOL20
            date
            2016-06-24	0.165260	0.002198	0.085632	-0.078074	0.173832	0.214377	0.068445
            2016-06-27	0.165537	0.003583	0.063299	-0.048674	0.180890	0.202724	0.081748
            2016-06-28	0.135215	0.010403	0.059038	-0.034879	0.111691	0.122554	0.042489
            2016-06-29	0.068774	0.019848	0.058476	-0.049971	0.042805	0.053339	0.079592
            2016-06-30	0.039431	0.012271	0.037432	-0.027272	0.010902	0.077293	-0.050667

    :param holding_period: 持有周期(int)
    :param rollback_period: 滚动窗口,即计算每一天的因子权重时,使用了之前rollback_period下的IC时间序列来计算IC均值向量和IC协方差矩阵(int)。
    :param covariance_type:"shrink"/"simple" 协防差矩阵估算方式 Ledoit-Wolf压缩估计或简单估计
    :return: weight_df:使用Sample协方差矩阵估算方法得到的因子权重(pd.Dataframe),
             索引(index)为datetime,columns为待合成的因子名称。
    """
    # 最大化t-n ~ t天的ic_ir,用到了截止到t+period的数据(算收益),
    # 算得的权重用于t+period的因子进行加权
    n = rollback_period
    weight_df = pd.DataFrame(index=ic_df.index, columns=ic_df.columns)
    lw = LedoitWolf()
    for dt in ic_df.index:
        ic_dt = ic_df[ic_df.index <= dt].tail(n)
        if len(ic_dt) < n:
            continue
        if covariance_type == "shrink":
            try:
                ic_cov_mat = lw.fit(ic_dt.as_matrix()).covariance_
            except:
                ic_cov_mat = np.mat(np.cov(ic_dt.T.as_matrix()).astype(float))
        else:
            ic_cov_mat = np.mat(np.cov(ic_dt.T.as_matrix()).astype(float))
        inv_ic_cov_mat = np.linalg.inv(ic_cov_mat)
        weight = inv_ic_cov_mat * np.mat(ic_dt.mean().values).reshape(
            len(inv_ic_cov_mat), 1)
        weight = np.array(weight.reshape(len(weight), ))[0]
        weight_df.ix[dt] = weight / np.sum(np.abs(weight))

    return weight_df.shift(holding_period)
Esempio n. 31
0
def untangle(X: Iterable,
             y: Iterable,
             n_clusters: int = None,
             get_connectivity: bool = True,
             compute_distances: bool = True,
             kind: str = 'correlation',
             agglo_kws: Union[dict, Bunch] = None) -> FeatureAgglomeration:

    from nilearn.connectome import ConnectivityMeasure as CM
    from sklearn.cluster import FeatureAgglomeration
    from sklearn.covariance import LedoitWolf
    from sklearn.feature_selection import SelectKBest
    from sklearn.feature_selection import mutual_info_classif

    agglo_defs = dict(affinity='euclidean',
                      compute_full_tree='auto',
                      linkage='ward',
                      pooling_func=np.mean,
                      distance_threshold=None,
                      compute_distances=compute_distances)

    if get_connectivity is True:
        connect_mat = CM(LedoitWolf(), kind=kind).fit_transform([X.values])[0]
    else:
        connect_mat = None

    if n_clusters is None:
        n_clusters = divmod(X.shape[1], 2)[0] - 1
        if n_clusters == 0:
            n_clusters = 1

    if agglo_kws is None:
        agglo_kws = {}
    agglo_defs.update(agglo_kws)

    agglo = FeatureAgglomeration(n_clusters=n_clusters,
                                 connectivity=connect_mat,
                                 **agglo_defs)
    if not isinstance(y, pd.Series):
        y = pd.Series(y)
    if not isinstance(X, pd.DataFrame):
        X = pd.DataFrame(X)

    agglo.fit(X, y)

    setattr(
        agglo, 'cluster_indexes_',
        pd.DataFrame(zip(agglo.labels_, agglo.feature_names_in_),
                     columns=['cluster',
                              'feature']).groupby('cluster').feature)

    skb = SelectKBest(k=1, score_func=mutual_info_classif)
    factor_leaders_ = [
        skb.fit(X[itm[1]], y).get_feature_names_out()[0]
        for itm in tuple(agglo.cluster_indexes_)
    ]
    setattr(agglo, 'factor_leaders_', factor_leaders_)
    return agglo
Esempio n. 32
0
def connectivity(subjects_ts, kinds=kinds, saveas='file'):
    """
    Estimates Functional Connectivity using several estimation models 
    Parameters
    ----------
    subjects_ts: array-like , 2-D (n_subjects,n_regions)
                 Array of BOLD time-series  
    
    kinds: list of kinds of connectivity measure to be computed . kinds include : 
        ' correlation ' , ' partial correlation', ' tangent' , 'covariance' . 
                                                
    
    saveas : Destination to save and load output (.npz)
    
    Returns
    ---------
    mean_connectivity_matrix: dictionary ,  {'kind' : (n_regions,n_regions)} 
                              Group-level functional connectivity matrix
    individual_connectivity_matrix: dictionary , {'kind' : (n_subjects,n_regions,n_regions)}
                              Subject-level functional connectivity matrices
                 
    """

    individual_connectivity_matrices = dict()

    mean_connectivity_matrix = dict()

    if os.path.exists(saveas):
        data = np.load(saveas)
        individual_connectivity_matrices = data['arr_0'].flatten()[0]
        mean_connectivity_matrix = data['arr_1'].flatten()[0]
    else:

        for kind in kinds:

            # Computing individual functional connectivity

            conn_measure = ConnectivityMeasure(cov_estimator=LedoitWolf(
                assume_centered=True, store_precision=True),
                                               kind=kind,
                                               vectorize=False,
                                               discard_diagonal=False)

            individual_connectivity_matrices[
                kind] = conn_measure.fit_transform(subjects_ts)

            # Computing group functional connectivity

            if kind == 'tangent':
                mean_connectivity_matrix[kind] = conn_measure.mean_
            else:
                mean_connectivity_matrix[kind] = \
                individual_connectivity_matrices[kind].mean(axis=0)
            np.savez(saveas, individual_connectivity_matrices,
                     mean_connectivity_matrix)

    return mean_connectivity_matrix, individual_connectivity_matrices
Esempio n. 33
0
 def __init__(self,
              cov_estimator=LedoitWolf(store_precision=False),
              kind='covariance',
              vectorize=False,
              discard_diagonal=False):
     self.cov_estimator = cov_estimator
     self.kind = kind
     self.vectorize = vectorize
     self.discard_diagonal = discard_diagonal
Esempio n. 34
0
def simulateLogNormal(data, covtype='Estimate', nsamples=2000, **kwargs):
    """

    :param data:
    :param covtype: Type of covariance matrix estimator. Allowed types are:
        - Estimate (default):
        - Diagonal:
        - Shrinkage OAS:
    :param int nsamples: Number of simulated samples to draw
    :return: simulated data and empirical covariance est
    """

    try:
        # Offset data to make sure there are no 0 values for log transform
        offset = np.min(data) + 1
        offdata = data + offset

        # log on the offsetted data
        logdata = np.log(offdata)
        # Get the means
        meanslog = np.mean(logdata, axis=0)

        # Specify covariance
        # Regular covariance estimator
        if covtype == "Estimate":
            covlog = np.cov(logdata, rowvar=0)
        # Shrinkage covariance estimator, using LedoitWolf
        elif covtype == "ShrinkageLedoitWolf":
            scov = LedoitWolf()
            scov.fit(logdata)
            covlog = scov.covariance_
        elif covtype == "ShrinkageOAS":
            scov = OAS()
            scov.fit(logdata)
            covlog = scov.covariance_

        # Diagonal covariance matrix (no between variable correlation)
        elif covtype == "Diagonal":
            covlogdata = np.var(
                logdata, axis=0)  #get variance of log data by each column
            covlog = np.diag(
                covlogdata
            )  #generate a matrix with diagonal of variance of log Data
        else:
            raise ValueError('Unknown Covariance type')

        simData = np.random.multivariate_normal(meanslog, covlog, nsamples)
        simData = np.exp(simData)
        simData -= offset

        ##Set to 0 negative values
        simData[np.where(simData < 0)] = 0
        # work out the correlation of matrix by columns, each column is a variable
        corrMatrix = np.corrcoef(simData, rowvar=0)

        return simData, corrMatrix

    except Exception as exp:
        raise exp
def prepareProblem(filePath, shrinkage=False, subset=False, subsetSize=0):
    # Import data from .csv
    df = pd.read_csv(filePath, sep=';')
    df.index = df.date
    df = df.drop('date', axis=1)

    # Subset, if called via subset == True
    if subset == True:
        df = df.tail(subsetSize)

    # Estimate covariance using Empirical/MLE
    # Expected input is returns, hence set: assume_centered = True
    mleFitted = empirical_covariance(X=df, assume_centered=True)
    sigma = mleFitted

    if shrinkage == True:
        # Estimate covariance using LedoitWolf, first create instance of object
        lw = LedoitWolf(assume_centered=True)
        lwFitted = lw.fit(X=df).covariance_
        sigma = lwFitted

    return sigma
base_X_train = np.random.normal(size=(n_samples, n_features))
base_X_test = np.random.normal(size=(n_samples, n_features))

# Color samples
coloring_matrix = np.random.normal(size=(n_features, n_features))
X_train = np.dot(base_X_train, coloring_matrix)
X_test = np.dot(base_X_test, coloring_matrix)

###############################################################################
# Compute Ledoit-Wolf and Covariances on a grid of shrinkages

from sklearn.covariance import LedoitWolf, OAS, ShrunkCovariance, \
    log_likelihood, empirical_covariance

# Ledoit-Wolf optimal shrinkage coefficient estimate
lw = LedoitWolf()
loglik_lw = lw.fit(X_train, assume_centered=True).score(
    X_test, assume_centered=True)

# OAS coefficient estimate
oa = OAS()
loglik_oa = oa.fit(X_train, assume_centered=True).score(
    X_test, assume_centered=True)

# spanning a range of possible shrinkage coefficient values
shrinkages = np.logspace(-3, 0, 30)
negative_logliks = [-ShrunkCovariance(shrinkage=s).fit(
        X_train, assume_centered=True).score(X_test, assume_centered=True) \
                         for s in shrinkages]

# getting the likelihood under the real model
Esempio n. 37
0
def main():
    '''
    Constructs a co-occurence network from gene expression data.

    Main entry point to code.
    '''

    # Read in the data
    if os.path.isfile(DATA_PICKLE):
        print("reading previously saved data from pickle %s" % (DATA_PICKLE))
        with open(DATA_PICKLE, 'rb') as file:
            df = pickle.load(file)
            lwe = pickle.load(file)
            pmat = pickle.load(file)
            pcore_indices = pickle.load(file)
            pcor = pickle.load(file)
            lfdr_pcor = pickle.load(file)
            #prob = pickle.load(file)
    else:
        print("reading in data from %s" % (FILENAME))
        df = pd.read_csv(FILENAME, sep='\t')
        print("found %d rows and %d columns" % (df.shape[0], df.shape[1]))
        # compute the row means and sort the data frame by descinding means
        df['row_means'] = df.mean(axis=1)
        df.sort_values('row_means', axis=0, ascending=False, inplace=True)
        df.drop('row_means', axis=1, inplace=True)
        # take the most abundant genes
        df = df.head(PRUNE_GENES)

        # Ledoit-Wolf optimal shrinkage coefficient estimate
        print("computing Ledoit-Wolf optimal shrinkage coeffecient estimate")
        lwe = LedoitWolf().fit(df.transpose())
        pmat = lwe.get_precision()
        # Convert symmetric matrix to array, first by getting indices
        # of the off diagonal elements, second by pulling them into
        # separate array (pcor).
        print("extracting off diagnol elements of precision matrix")
        pcor_indices = np.triu_indices(pmat.shape[0], 1)
        pcor = pmat[pcor_indices]

        # Determine edges by computing lfdr of pcor.
        print("computing lfdr of partial correlations")
        fdrtool = importr('fdrtool')
        lfdr_pcor = fdrtool.fdrtool(FloatVector(pcor), statistic="correlation", plot=False)
        #prob = 1-lfdr_pcor['lfdr']

        with open(DATA_PICKLE, 'wb') as file:
            pickle.dump(df, file, pickle.HIGHEST_PROTOCOL)
            pickle.dump(lwe, file, pickle.HIGHEST_PROTOCOL)
            pickle.dump(pmat, file, pickle.HIGHEST_PROTOCOL)
            pickle.dump(pcor_indices, file, pickle.HIGHEST_PROTOCOL)
            pickle.dump(pcor, file, pickle.HIGHEST_PROTOCOL)
            pickle.dump(lfdr_pcor, file, pickle.HIGHEST_PROTOCOL)
            #pickle.dump(prob, file, pickle.HIGHEST_PROTOCOL)

    print("making 1-lfdr vs. pcor plot")
    prob = 1-np.array(lfdr_pcor.rx2('lfdr'))
    with PdfPages(PDF_FILENAME) as pdf:
        plt.figure(figsize=(3, 3))
        plt.plot(range(7), [3, 1, 4, 1, 5, 9, 2], 'r-o')
        plt.title('Page One')
        pdf.savefig()  # saves the current figure into a pdf page
        plt.close()

        plt.plot(pcor[0:10000:10], prob[0:10000:10], 'o', markeredgecolor='k', markersize=3)
        plt.title("THIS IS A PLOT TITLE, YOU BET")
        plt.xlabel('partial correlation')
        plt.ylabel('lfdr')
        pdf.savefig
        plt.close()
# Remove data not analysed
mask_block=block==block
for x in range(label.shape[0]):
    if label[x,2]!=label[x-1,2]:
        mask_block[x]=False
    elif label[x,2]!=label[x-2,2]:
        mask_block[x]=False
c_des_out=np.logical_not(label[:,2]== b'des')
tmp_out= np.logical_and(c_des_out,mask_block)
c_rest_out=np.logical_not(label[:,0]== b'rest')
cond_out= np.logical_and(tmp_out,c_rest_out)
y=label[cond_out,2]
labels=np.unique(y)
# Prepare correlation
estimator = LedoitWolf()
scaler=StandardScaler()
# Create np array
result_matrix = np.empty([len(names),motor_region.shape[0],labels.shape[0],labels.shape[0]])

#Analysis for each subject
for i,n in enumerate(sorted(names)):
    roi_name=fold_g+'mni4060/asymroi_'+smt+'_'+n+'.npz'   
    roi=np.load(roi_name)['roi'][cond_out]
    roi=roi[:,motor_region-1] 
    for j in range(motor_region.shape[0]):
        roi_j=roi[:,j]
        roi_mat=np.zeros(((y==b'imp').sum(),len(labels)))
        for z,lab in enumerate(sorted(labels)):
            roi_mat[:,z]=roi_j[y==lab]           
        roi_sc=scaler.fit_transform(roi_mat) 
  # stack a random subset image patches, 125K
  X_unlab_patches = []
  random.seed(42)
  print "Gathering examples..."
  # Use subsample of 200K for k-means and covariance estimates
  for i in random.sample(range(0, unlab_X.shape[2]), 200000):
    patches = view_as_windows(unlab_X[:, :, i], (w, w), step=s)
    re_shaped = numpy.reshape(patches, (patches.shape[0]*patches.shape[0], w * w))
    # normalize the patches, per sample
    re_shaped = preprocessing.scale(re_shaped, axis=1)
    X_unlab_patches.append(re_shaped)
  X_unlab_patches = numpy.vstack(X_unlab_patches)

  # build whitening transform matrix
  print "Fitting ZCA Whitening Transform..."
  cov = LedoitWolf()
  cov.fit(X_unlab_patches)  # fit covariance estimate
  D, U = numpy.linalg.eigh(cov.covariance_)
  V = numpy.sqrt(numpy.linalg.inv(numpy.diag(D + zca_eps)))
  Wh = numpy.dot(numpy.dot(U, V), U.T)
  mu = numpy.mean(X_unlab_patches, axis=0)
  X_unlab_patches = numpy.dot(X_unlab_patches-mu, Wh)

  # run k-means on unlabelled data
  print "Starting k-means..."
  clustr = sklearn.cluster.MiniBatchKMeans(n_clusters=n_clust,
                                           compute_labels=False,
                                           batch_size=300)
  k_means = clustr.fit(X_unlab_patches)

Esempio n. 40
0
def test_ledoit_wolf():
    """Tests LedoitWolf module on a simple dataset.

    """
    # test shrinkage coeff on a simple data set
    lw = LedoitWolf()
    lw.fit(X, assume_centered=True)
    assert_almost_equal(lw.shrinkage_, 0.00192, 4)
    assert_almost_equal(lw.score(X, assume_centered=True), -2.89795, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X,
                                                        assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
    scov.fit(X, assume_centered=True)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf()
    lw.fit(X_1d, assume_centered=True)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False)
    lw.fit(X, assume_centered=True)
    assert_almost_equal(lw.score(X, assume_centered=True), -2.89795, 4)
    assert(lw.precision_ is None)

    # Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    lw = LedoitWolf()
    lw.fit(X)
    assert_almost_equal(lw.shrinkage_, 0.007582, 4)
    assert_almost_equal(lw.score(X), 2.243483, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf()
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False)
    lw.fit(X)
    assert_almost_equal(lw.score(X), 2.2434839, 4)
    assert(lw.precision_ is None)
Esempio n. 41
0
    def threshold_from_simulations(self, X, precision=2000, verbose=False,
                                   n_jobs=-1):
        """
        """
        import multiprocessing as mp
        if n_jobs < 1:
            n_jobs = mp.cpu_count()
        n_samples, n_features = X.shape
        n = n_samples
        p = n_features
        h = self.support_.sum()
        lw = LedoitWolf()
        ref_covariance = lw.fit(X[self.support_]).covariance_
        c = sp.stats.chi2(p + 2).cdf(
            sp.stats.chi2(p).ppf(float(h) / n)) / (float(h) / n)
        sigma_root = np.linalg.cholesky(ref_covariance / c)
        all_h = []

        # inliers distribution
        dist_in = np.array([], ndmin=1)
        max_i = max(1, int(precision / float(self.support_.sum())))
        for i in range(max_i):
            if verbose and max_i > 4 and (i % (max_i / 4) == 0):
                print "\t", 50 * i / float(max_i), "%"
            #sigma_root = np.diag(np.sqrt(eigenvalues))
            #sigma_root = np.eye(n_features)
            X1, _ = dg.generate_gaussian(
                n_samples, n_features, np.zeros(n_features),
                cov_root=sigma_root)
            # learn location and shape
            clf = EllipticEnvelopeRMCDl1(
                correction=self.correction, shrinkage=self.shrinkage,
                h=self.support_.sum() / float(n_samples), no_fit=True).fit(
                X1)
            X2 = X1 - clf.location_
            dist_in = np.concatenate(
                (dist_in, clf.decision_function(
                        X2[clf.support_], raw_values=True)))
            all_h.append(clf.h)

        # outliers distribution
        dist_out = np.array([], ndmin=1)
        max_i = max(1, int(precision / float(n_samples - self.support_.sum())))
        for i in range(max_i):
            if verbose and max_i > 4 and (i % (max_i / 4) == 0):
                print "\t", 50 * (1. + i / float(max_i)), "%"
            X1, _ = dg.generate_gaussian(
                n_samples, n_features, np.zeros(n_features),
                cov_root=sigma_root)
            # learn location and shape
            clf = EllipticEnvelopeRMCDl1(
                correction=self.correction, shrinkage=self.shrinkage,
                h=self.support_.sum() / float(n_samples), no_fit=True).fit(X1)
            X2 = X1 - clf.location_
            dist_out = np.concatenate(
                (dist_out, clf.decision_function(
                        X2[~clf.support_], raw_values=True)))
            all_h.append(clf.h)
        self.dist_in = np.sort(dist_in)
        self.dist_out = np.sort(dist_out)
        self.h_mean = np.mean(all_h)

        return self.dist_out
Esempio n. 42
0
def test_ledoit_wolf():
    # Tests LedoitWolf module on a simple dataset.
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    lw = LedoitWolf(assume_centered=True)
    lw.fit(X_centered)
    shrinkage_ = lw.shrinkage_

    score_ = lw.score(X_centered)
    assert_almost_equal(ledoit_wolf_shrinkage(X_centered,
                                              assume_centered=True),
                        shrinkage_)
    assert_almost_equal(ledoit_wolf_shrinkage(X_centered, assume_centered=True,
                                              block_size=6),
                        shrinkage_)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_centered,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf(assume_centered=True)
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_1d,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False, assume_centered=True)
    lw.fit(X_centered)
    assert_almost_equal(lw.score(X_centered), score_, 4)
    assert(lw.precision_ is None)

    # Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    lw = LedoitWolf()
    lw.fit(X)
    assert_almost_equal(lw.shrinkage_, shrinkage_, 4)
    assert_almost_equal(lw.shrinkage_, ledoit_wolf_shrinkage(X))
    assert_almost_equal(lw.shrinkage_, ledoit_wolf(X)[1])
    assert_almost_equal(lw.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf()
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_1d)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)

    # test with one sample
    # warning should be raised when using only 1 sample
    X_1sample = np.arange(5).reshape(1, 5)
    lw = LedoitWolf()
    assert_warns(UserWarning, lw.fit, X_1sample)
    assert_array_almost_equal(lw.covariance_,
                              np.zeros(shape=(5, 5), dtype=np.float64))

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False)
    lw.fit(X)
    assert_almost_equal(lw.score(X), score_, 4)
    assert(lw.precision_ is None)
Esempio n. 43
0
def test_ledoit_wolf():
    """Tests LedoitWolf module on a simple dataset.

    """
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    lw = LedoitWolf(assume_centered=True)
    lw.fit(X_centered)
    shrinkage_ = lw.shrinkage_
    score_ = lw.score(X_centered)
    assert_almost_equal(ledoit_wolf_shrinkage(X_centered,
                                              assume_centered=True),
                        shrinkage_)
    assert_almost_equal(ledoit_wolf_shrinkage(X_centered,
                                assume_centered=True, block_size=6),
                        shrinkage_)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_centered,
                                                        assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf(assume_centered=True)
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False, assume_centered=True)
    lw.fit(X_centered)
    assert_almost_equal(lw.score(X_centered), score_, 4)
    assert(lw.precision_ is None)

    # (too) large data set
    X_large = np.ones((20, 200))
    assert_raises(MemoryError, ledoit_wolf, X_large, block_size=100)

    # Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    lw = LedoitWolf()
    lw.fit(X)
    assert_almost_equal(lw.shrinkage_, shrinkage_, 4)
    assert_almost_equal(lw.shrinkage_, ledoit_wolf_shrinkage(X))
    assert_almost_equal(lw.shrinkage_, ledoit_wolf(X)[1])
    assert_almost_equal(lw.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf()
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)

    # test with one sample
    X_1sample = np.arange(5)
    lw = LedoitWolf()
    with warnings.catch_warnings(record=True):
        lw.fit(X_1sample)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False)
    lw.fit(X)
    assert_almost_equal(lw.score(X), score_, 4)
    assert(lw.precision_ is None)
# under the ground-truth model, which we would not have access to in real
# settings
real_cov = np.dot(coloring_matrix.T, coloring_matrix)
emp_cov = empirical_covariance(X_train)
loglik_real = -log_likelihood(emp_cov, linalg.inv(real_cov))

# #############################################################################
# Compare different approaches to setting the parameter

# GridSearch for an optimal shrinkage coefficient
tuned_parameters = [{'shrinkage': shrinkages}]
cv = GridSearchCV(ShrunkCovariance(), tuned_parameters, cv=5)
cv.fit(X_train)

# Ledoit-Wolf optimal shrinkage coefficient estimate
lw = LedoitWolf()
loglik_lw = lw.fit(X_train).score(X_test)

# OAS coefficient estimate
oa = OAS()
loglik_oa = oa.fit(X_train).score(X_test)

# #############################################################################
# Plot results
fig = plt.figure()
plt.title("Regularized covariance: likelihood and shrinkage coefficient")
plt.xlabel('Regularization parameter: shrinkage coefficient')
plt.ylabel('Error: negative log-likelihood on test data')
# range shrinkage curve
plt.loglog(shrinkages, negative_logliks, label="Negative log-likelihood")
def test_connectivity_measure_outputs():
    n_subjects = 10
    n_features = 49
    n_samples = 200

    # Generate signals and compute covariances
    emp_covs = []
    ledoit_covs = []
    signals = []
    random_state = check_random_state(0)
    ledoit_estimator = LedoitWolf()
    for k in range(n_subjects):
        signal = random_state.randn(n_samples, n_features)
        signals.append(signal)
        signal -= signal.mean(axis=0)
        emp_covs.append((signal.T).dot(signal) / n_samples)
        ledoit_covs.append(ledoit_estimator.fit(signal).covariance_)

    kinds = ["correlation", "tangent", "precision",
             "partial correlation"]

    # Check outputs properties
    for cov_estimator, covs in zip([EmpiricalCovariance(), LedoitWolf()],
                                   [emp_covs, ledoit_covs]):
        input_covs = copy.copy(covs)
        for kind in kinds:
            conn_measure = ConnectivityMeasure(kind=kind,
                                               cov_estimator=cov_estimator)
            connectivities = conn_measure.fit_transform(signals)

            # Generic
            assert_true(isinstance(connectivities, np.ndarray))
            assert_equal(len(connectivities), len(covs))

            for k, cov_new in enumerate(connectivities):
                assert_array_equal(input_covs[k], covs[k])
                assert(is_spd(covs[k], decimal=7))

                # Positive definiteness if expected and output value checks
                if kind == "tangent":
                    assert_array_almost_equal(cov_new, cov_new.T)
                    gmean_sqrt = _map_eigenvalues(np.sqrt,
                                                  conn_measure.mean_)
                    assert(is_spd(gmean_sqrt, decimal=7))
                    assert(is_spd(conn_measure.whitening_, decimal=7))
                    assert_array_almost_equal(conn_measure.whitening_.dot(
                        gmean_sqrt), np.eye(n_features))
                    assert_array_almost_equal(gmean_sqrt.dot(
                        _map_eigenvalues(np.exp, cov_new)).dot(gmean_sqrt),
                        covs[k])
                elif kind == "precision":
                    assert(is_spd(cov_new, decimal=7))
                    assert_array_almost_equal(cov_new.dot(covs[k]),
                                              np.eye(n_features))
                elif kind == "correlation":
                    assert(is_spd(cov_new, decimal=7))
                    d = np.sqrt(np.diag(np.diag(covs[k])))
                    if cov_estimator == EmpiricalCovariance():
                        assert_array_almost_equal(d.dot(cov_new).dot(d),
                                                  covs[k])
                    assert_array_almost_equal(np.diag(cov_new),
                                              np.ones((n_features)))
                elif kind == "partial correlation":
                    prec = linalg.inv(covs[k])
                    d = np.sqrt(np.diag(np.diag(prec)))
                    assert_array_almost_equal(d.dot(cov_new).dot(d), -prec +
                                              2 * np.diag(np.diag(prec)))
def shrink(X):
        lw = LedoitWolf(store_precision=False, assume_centered=False)
        lw.fit(X)
        return lw.covariance_
Esempio n. 47
0
class DCS_kd(BaseEstimator):

    def __init__(self, k=2, gamma=1.0, covariance_estimator='ledoit-wolf'):
        self.k = float(k)
        self.gamma = gamma
        self.covariance_estimator = covariance_estimator

        if covariance_estimator == 'empirical':
            self.cov = EmpiricalCovariance(store_precision=False)
        elif covariance_estimator == 'ledoit-wolf':
            self.cov = LedoitWolf(store_precision=False)
        else:
            raise NotImplementedError('%s is not implemented' % covariance_estimator)

        self.x0 = None
        self.x1 = None

    def fit(self, x, y):
        self.x0 = x[y == min(y)]
        self.x1 = x[y == max(y)]

    def __str__(self):
        return 'Analytical Cauchy-Schwarz Divergence in {}-d'.format(self.k)

    def value(self, v):
        # We need matrix, not vector
        v = v.reshape(-1, self.k)

        ipx0 = self._ipx(self.x0, self.x0, v)
        ipx1 = self._ipx(self.x1, self.x1, v)
        ipx2 = self._ipx(self.x0, self.x1, v)

        return np.log(ipx0) + np.log(ipx1) - 2 * np.log(ipx2)

    def derivative(self, v):
        # We need matrix, not vector
        v = v.reshape(-1, self.k)

        ret = (self._d_ipx(self.x0, self.x0, v) / self._ipx(self.x0, self.x0, v)
               + self._d_ipx(self.x1, self.x1, v) / self._ipx(self.x1, self.x1, v)
               - 2 * self._d_ipx(self.x0, self.x1, v) / self._ipx(self.x0, self.x1, v))

        return ret.reshape(-1)

    def _H(self, X0, X1):
        n = (4.0 / (self.k + 2)) ** (2.0 / (self.k + 4))
        p = (-2.0 / (self.k + 4))
        return n * (X0.shape[0] ** p * self.cov.fit(X0).covariance_ + X1.shape[0] ** p * self.cov.fit(X1).covariance_)

    def _f1(self, X0, X1, v):
        Hxy = self.gamma * self.gamma * self._H(X0, X1)
        vHv = v.T.dot(Hxy).dot(v)
        # return 1.0 / np.sqrt(la.det(vHv))
        return 1.0 / (X0.shape[0] * X1.shape[0] * np.sqrt(la.det(vHv)) * (2 * np.pi) ** (self.k / 2))

    def _g1(self, X0, X1, v):
        Hxy = self.gamma * self.gamma * self._H(X0, X1)
        vHv = v.T.dot(Hxy).dot(v)
        return - self._f1(X0, X1, v) * Hxy.dot(v).dot(la.inv(vHv))

    def _f2(self, X0, X1, v):
        Hxy = self.gamma * self.gamma * self._H(X0, X1)
        vHv = v.T.dot(Hxy).dot(v)
        vHv_inv = la.inv(vHv)

        vx0 = X0.dot(v)
        vx1 = X1.dot(v)
        vx0c = vx0.dot(vHv_inv)
        vx1c = vx1.dot(vHv_inv)

        ret = 0.0
        for i in range(X0.shape[0]):
            ret += np.exp(-0.5 * ((vx0c[i] - vx1c) * (vx0[i] - vx1)).sum(axis=1)).sum()
        return ret

    def _g2(self, X0, X1, v):
        Hxy = self.gamma * self.gamma * self._H(X0, X1)
        vHv = v.T.dot(Hxy).dot(v)
        vHv_inv = la.inv(vHv)  # k x k

        vx0 = X0.dot(v)
        vx1 = X1.dot(v)
        vx0c = vx0.dot(vHv_inv)
        vx1c = vx1.dot(vHv_inv)

        eye = np.eye(v.shape[0])
        right_expr = (eye - Hxy.dot(v).dot(vHv_inv).dot(v.T))  # d x d

        d = v.shape[0]
        k = int(self.k)
        ret = 0.0
        for i in range(X0.shape[0]):
            f2_vals = np.exp(-0.5 * ((vx0c[i] - vx1c) * (vx0[i] - vx1)).sum(axis=1)).reshape(-1, 1)
            ws = (X0[i] - X1).reshape(X1.shape[0], d, 1)
            vxdiffs = (- f2_vals * (vx0[i] - vx1)).reshape(X1.shape[0], 1, k)
            ret += np.tensordot(ws, vxdiffs, ([0, 2], [0, 1]))

        return right_expr.dot(ret).dot(vHv_inv)

    def _ipx(self, X0, X1, v):
        return self._f1(X0, X1, v) * self._f2(X0, X1, v)

    def _d_ipx(self, X0, X1, v):
        return self._f1(X0, X1, v) * self._g2(X0, X1, v) + self._f2(X0, X1, v) * self._g1(X0, X1, v)
Esempio n. 48
0
r = 0.1
real_cov = toeplitz(r**np.arange(n_features))
coloring_matrix = cholesky(real_cov)

n_samples_range = np.arange(6, 31, 1)
repeat = 100
lw_mse = np.zeros((n_samples_range.size, repeat))
oa_mse = np.zeros((n_samples_range.size, repeat))
lw_shrinkage = np.zeros((n_samples_range.size, repeat))
oa_shrinkage = np.zeros((n_samples_range.size, repeat))
for i, n_samples in enumerate(n_samples_range):
    for j in range(repeat):
        X = np.dot(
            np.random.normal(size=(n_samples, n_features)), coloring_matrix.T)

        lw = LedoitWolf(store_precision=False)
        lw.fit(X, assume_centered=True)
        lw_mse[i,j] = lw.error_norm(real_cov, scaling=False)
        lw_shrinkage[i,j] = lw.shrinkage_

        oa = OAS(store_precision=False)
        oa.fit(X, assume_centered=True)
        oa_mse[i,j] = oa.error_norm(real_cov, scaling=False)
        oa_shrinkage[i,j] = oa.shrinkage_

# plot MSE
pl.subplot(2,1,1)
pl.errorbar(n_samples_range, lw_mse.mean(1), yerr=lw_mse.std(1),
            label='Ledoit-Wolf', color='g')
pl.errorbar(n_samples_range, oa_mse.mean(1), yerr=oa_mse.std(1),
            label='OAS', color='r')
Esempio n. 49
0
def plot_psds(psd_file, data_dir='/auto/tdrive/mschachter/data'):

    # read PairwiseCF file
    pcf_file = os.path.join(data_dir, 'aggregate', 'pairwise_cf.h5')
    pcf = AggregatePairwiseCF.load(pcf_file)
    # pcf.zscore_within_site()

    g = pcf.df.groupby(['bird', 'block', 'segment', 'electrode'])
    nsamps_electrodes = len(g)

    i = pcf.df.cell_index != -1
    g = pcf.df[i].groupby(['bird', 'block', 'segment', 'electrode', 'cell_index'])
    nsamps_cells = len(g)

    print '# of electrodes: %d' % nsamps_electrodes
    print '# of cells: %d' % nsamps_cells
    print '# of lfp samples: %d' % (pcf.lfp_psds.shape[0])
    print '# of spike psd samples: %d' % (pcf.spike_psds.shape[0])

    # compute the LFP mean and std
    lfp_psds = deepcopy(pcf.lfp_psds)
    print 'lfp_psds_ind: max=%f, q99=%f' % (lfp_psds.max(), np.percentile(lfp_psds.ravel(), 99))
    log_transform(lfp_psds)
    print 'lfp_psds_ind: max=%f, q99=%f' % (lfp_psds.max(), np.percentile(lfp_psds.ravel(), 99))
    nz = lfp_psds.sum(axis=1) > 0
    lfp_psds = lfp_psds[nz, :]
    lfp_psd_mean = lfp_psds.mean(axis=0)
    lfp_psd_std = lfp_psds.std(axis=0, ddof=1)
    nsamps_lfp = lfp_psds.shape[0]

    # get the spike rate
    spike_rate = pcf.df.spike_rate.values
    # plt.figure()
    # plt.hist(spike_rate, bins=20, color='g', alpha=0.7)
    # plt.title('Spike Rate Histogram, q1=%0.3f, q5=%0.3f, q10=%0.3f, q50=%0.3f, q99=%0.3f' %
    #           (np.percentile(spike_rate, 1), np.percentile(spike_rate, 5), np.percentile(spike_rate, 10),
    #           np.percentile(spike_rate, 50), np.percentile(spike_rate, 99)))
    # plt.show()

    # compute the covariance
    lfp_psd_z = deepcopy(lfp_psds)
    lfp_psd_z -= lfp_psd_mean
    lfp_psd_z /= lfp_psd_std
    lfp_and_spike_cov_est = LedoitWolf()
    lfp_and_spike_cov_est.fit(lfp_psd_z)
    lfp_and_spike_cov = lfp_and_spike_cov_est.covariance_

    """
    # read CRCNS file
    cell_data = dict()
    hf = h5py.File(psd_file, 'r')
    cnames = hf.attrs['col_names']
    for c in cnames:
        cell_data[c] = np.array(hf[c])
    crcns_psds = np.array(hf['psds'])
    freqs = hf.attrs['freqs']
    hf.close()

    cell_df = pd.DataFrame(cell_data)
    print 'regions=',cell_df.superregion.unique()

    name_map = {'brainstem':'MLd', 'thalamus':'OV', 'cortex':'Field L+CM'}
    """

    # resample the lfp mean and std
    freq_rs = np.linspace(pcf.freqs.min(), pcf.freqs.max(), 1000)
    
    lfp_mean_cs = interp1d(pcf.freqs, lfp_psd_mean, kind='cubic')
    lfp_mean_rs = lfp_mean_cs(freq_rs)
    
    lfp_std_cs = interp1d(pcf.freqs, lfp_psd_std, kind='cubic')
    lfp_std_rs = lfp_std_cs(freq_rs)

    # concatenate the lfp psd and log spike rate
    lfp_psd_and_spike_rate = list()
    for k,(li,si) in enumerate(zip(pcf.df['lfp_index'], pcf.df['spike_index'])):
        lpsd = pcf.lfp_psds[li, :]
        srate,sstd = pcf.spike_rates[si, :]
        if srate > 0:
            lfp_psd_and_spike_rate.append(np.hstack([lpsd, np.log(srate)]))
    lfp_psd_and_spike_rate = np.array(lfp_psd_and_spike_rate)

    nfreqs = len(pcf.freqs)
    lfp_rate_cc = np.zeros([nfreqs])
    for k in range(nfreqs):
        lfp_rate_cc[k] = np.corrcoef(lfp_psd_and_spike_rate[:, k], lfp_psd_and_spike_rate[:, -1])[0, 1]

    fig = plt.figure(figsize=(24, 12))
    fig.subplots_adjust(left=0.05, right=0.95, wspace=0.30, hspace=0.30)

    nrows = 2
    ncols = 100
    gs = plt.GridSpec(nrows, ncols)

    ax = plt.subplot(gs[0, :35])
    plt.errorbar(freq_rs, lfp_mean_rs, yerr=lfp_std_rs, c='k', linewidth=9.0, elinewidth=3.0,
                 ecolor='#D8D8D8', alpha=0.5, capthick=0.)
    plt.axis('tight')
    plt.xlabel('Frequency (Hz)')
    plt.ylabel('Power (dB)')
    # plt.ylim(0, 1)
    plt.title('Mean LFP PSD')

    ax = plt.subplot(gs[1, :35])
    plt.plot(pcf.freqs, lfp_rate_cc, '-', c=COLOR_BLUE_LFP, linewidth=9.0, alpha=0.7)
    plt.axhline(0, c='k')
    plt.axis('tight')
    plt.xlabel('Frequency (Hz)')
    plt.ylabel('Correlation Coefficient')
    plt.ylim(-0.05, 0.25)
    plt.title('LFP Power vs log Spike Rate')

    """
    fi = freqs < 200
    ax = plt.subplot(gs[1, :35])
    clrs = ['k', '#d60036', COLOR_YELLOW_SPIKE]
    alphas = [0.8, 0.8, 0.6]
    for k,reg in enumerate(['brainstem', 'thalamus', 'cortex']):

        i = cell_df.superregion == reg
        indices = cell_df['index'][i].values
        psds = crcns_psds[indices, :]
        log_psds = deepcopy(psds)
        log_transform(log_psds)

        # compute the mean and sd of the power spectra
        psd_mean = log_psds.mean(axis=0)
        psd_std = log_psds.std(axis=0, ddof=1)
        psd_cv = psd_std / psd_mean

        # plot the mean power spectrum on the left
        plt.plot(freqs[fi], psd_mean[fi], c=clrs[k], linewidth=9.0, alpha=alphas[k])
        plt.ylabel('Power (dB)')
        plt.xlabel('Frequency (Hz)')
        plt.axis('tight')
        plt.ylim(0, 1.0)
    plt.legend(['MLd', 'OV', 'Field L+CM'], fontsize='x-small', loc='upper right')
    plt.title('Mean PSTH PSDs (CRCNS Data)')
    """

    ax = plt.subplot(gs[:, 40:])
    plt.imshow(lfp_and_spike_cov, aspect='auto', interpolation='nearest', origin='lower', cmap=magma, vmin=0, vmax=1)
    plt.colorbar(label='Correlation Coefficient')
    xy = np.arange(len(pcf.freqs))
    lbls = ['%d' % f for f in pcf.freqs]
    plt.xticks(xy, lbls, rotation=0)
    plt.yticks(xy, lbls)
    plt.axhline(nfreqs-0.5, c='w')
    plt.axvline(nfreqs-0.5, c='w')
    plt.xlabel('Frequency (Hz)')
    plt.ylabel('Frequency (Hz)')
    plt.title('LFP PSD Correlation Matrix')

    fname = os.path.join(get_this_dir(), 'crcns_data.svg')
    plt.savefig(fname, facecolor='w', edgecolor='none')

    plt.show()
Esempio n. 50
0
def lda_train_scaled(fv, shrink=False):
    """Train the LDA classifier.

    Parameters
    ----------
    fv : ``Data`` object
        the feature vector must have 2 dimensional data, the first
        dimension being the class axis. The unique class labels must be
        0 and 1 otherwise a ``ValueError`` will be raised.
    shrink : Boolean, optional
        use shrinkage

    Returns
    -------
    w : 1d array
    b : float

    Raises
    ------
    ValueError : if the class labels are not exactly 0s and 1s

    Examples
    --------

    >>> clf = lda_train(fv_train)
    >>> out = lda_apply(fv_test, clf)

    See Also
    --------
    lda_apply

    """
    assert shrink is True
    x = fv.data
    y = fv.axes[0]
    if len(np.unique(y)) != 2:
        raise ValueError('Should only have two unique class labels, instead got'
            ': {labels}'.format(labels=np.unique(y)))
    # Use sorted labels
    labels = np.sort(np.unique(y))
    mu1 = np.mean(x[y == labels[0]], axis=0)
    mu2 = np.mean(x[y == labels[1]], axis=0)
    # x' = x - m
    m = np.empty(x.shape)
    m[y == labels[0]] = mu1
    m[y == labels[1]] = mu2
    x2 = x - m
    # w = cov(x)^-1(mu2 - mu1)
    if shrink:
        estimator = LW()
        covm = estimator.fit(x2).covariance_
    else:
        covm = np.cov(x2.T)
    w = np.dot(np.linalg.pinv(covm), (mu2 - mu1))

    #  From matlab bbci toolbox:
    # https://github.com/bbci/bbci_public/blob/fe6caeb549fdc864a5accf76ce71dd2a926ff12b/classification/train_RLDAshrink.m#L133-L134
    #C.w= C.w/(C.w'*diff(C_mean, 1, 2))*2;
    #C.b= -C.w' * mean(C_mean,2);
    w = (w / np.dot(w.T, (mu2 - mu1))) * 2
    b = np.dot(-w.T, np.mean((mu1, mu2), axis=0))
    assert not np.any(np.isnan(w))
    assert not np.isnan(b)
    return w, b
Esempio n. 51
0
time_series = masker.fit_transform(func_filename,
                                   confounds=[confound_filename])

##########################################################################
# Display time series
import matplotlib.pyplot as plt
for time_serie, label in zip(time_series.T, labels):
    plt.plot(time_serie, label=label)

plt.title('Default Mode Network Time Series')
plt.xlabel('Scan number')
plt.ylabel('Normalized signal')
plt.legend()
plt.tight_layout()


##########################################################################
# Compute precision matrices
from sklearn.covariance import LedoitWolf
cve = LedoitWolf()
cve.fit(time_series)


##########################################################################
# Display connectome
from nilearn import plotting

plotting.plot_connectome(cve.precision_, dmn_coords,
                         title="Default Mode Network Connectivity")
plotting.show()
def test_connectivity_measure_outputs():
    n_subjects = 10
    n_features = 49

    # Generate signals and compute covariances
    emp_covs = []
    ledoit_covs = []
    signals = []
    ledoit_estimator = LedoitWolf()
    for k in range(n_subjects):
        n_samples = 200 + k
        signal, _, _ = generate_signals(n_features=n_features, n_confounds=5,
                                        length=n_samples, same_variance=False)
        signals.append(signal)
        signal -= signal.mean(axis=0)
        emp_covs.append((signal.T).dot(signal) / n_samples)
        ledoit_covs.append(ledoit_estimator.fit(signal).covariance_)

    kinds = ["covariance", "correlation", "tangent", "precision",
             "partial correlation"]

    # Check outputs properties
    for cov_estimator, covs in zip([EmpiricalCovariance(), LedoitWolf()],
                                   [emp_covs, ledoit_covs]):
        input_covs = copy.copy(covs)
        for kind in kinds:
            conn_measure = ConnectivityMeasure(kind=kind,
                                               cov_estimator=cov_estimator)
            connectivities = conn_measure.fit_transform(signals)

            # Generic
            assert_true(isinstance(connectivities, np.ndarray))
            assert_equal(len(connectivities), len(covs))

            for k, cov_new in enumerate(connectivities):
                assert_array_equal(input_covs[k], covs[k])
                assert(is_spd(covs[k], decimal=7))

                # Positive definiteness if expected and output value checks
                if kind == "tangent":
                    assert_array_almost_equal(cov_new, cov_new.T)
                    gmean_sqrt = _map_eigenvalues(np.sqrt,
                                                  conn_measure.mean_)
                    assert(is_spd(gmean_sqrt, decimal=7))
                    assert(is_spd(conn_measure.whitening_, decimal=7))
                    assert_array_almost_equal(conn_measure.whitening_.dot(
                        gmean_sqrt), np.eye(n_features))
                    assert_array_almost_equal(gmean_sqrt.dot(
                        _map_eigenvalues(np.exp, cov_new)).dot(gmean_sqrt),
                        covs[k])
                elif kind == "precision":
                    assert(is_spd(cov_new, decimal=7))
                    assert_array_almost_equal(cov_new.dot(covs[k]),
                                              np.eye(n_features))
                elif kind == "correlation":
                    assert(is_spd(cov_new, decimal=7))
                    d = np.sqrt(np.diag(np.diag(covs[k])))
                    if cov_estimator == EmpiricalCovariance():
                        assert_array_almost_equal(d.dot(cov_new).dot(d),
                                                  covs[k])
                    assert_array_almost_equal(np.diag(cov_new),
                                              np.ones((n_features)))
                elif kind == "partial correlation":
                    prec = linalg.inv(covs[k])
                    d = np.sqrt(np.diag(np.diag(prec)))
                    assert_array_almost_equal(d.dot(cov_new).dot(d), -prec +
                                              2 * np.diag(np.diag(prec)))

    # Check the mean_
    for kind in kinds:
        conn_measure = ConnectivityMeasure(kind=kind)
        conn_measure.fit_transform(signals)
        assert_equal((conn_measure.mean_).shape, (n_features, n_features))
        if kind != 'tangent':
            assert_array_almost_equal(
                conn_measure.mean_,
                np.mean(conn_measure.transform(signals), axis=0))

    # Check that the mean isn't modified in transform
    conn_measure = ConnectivityMeasure(kind='covariance')
    conn_measure.fit(signals[:1])
    mean = conn_measure.mean_
    conn_measure.transform(signals[1:])
    assert_array_equal(mean, conn_measure.mean_)

    # Check vectorization option
    for kind in kinds:
        conn_measure = ConnectivityMeasure(kind=kind)
        connectivities = conn_measure.fit_transform(signals)
        conn_measure = ConnectivityMeasure(vectorize=True, kind=kind)
        vectorized_connectivities = conn_measure.fit_transform(signals)
        assert_array_almost_equal(vectorized_connectivities,
                                  sym_matrix_to_vec(connectivities))

    # Check not fitted error
    assert_raises_regex(
        ValueError, 'has not been fitted. ',
        ConnectivityMeasure().inverse_transform,
        vectorized_connectivities)

    # Check inverse transformation
    kinds.remove('tangent')
    for kind in kinds:
        # without vectorization: input matrices are returned with no change
        conn_measure = ConnectivityMeasure(kind=kind)
        connectivities = conn_measure.fit_transform(signals)
        assert_array_almost_equal(
            conn_measure.inverse_transform(connectivities), connectivities)

        # with vectorization: input vectors are reshaped into matrices
        # if diagonal has not been discarded
        conn_measure = ConnectivityMeasure(kind=kind, vectorize=True)
        vectorized_connectivities = conn_measure.fit_transform(signals)
        assert_array_almost_equal(
            conn_measure.inverse_transform(vectorized_connectivities),
            connectivities)

    # with vectorization if diagonal has been discarded
    for kind in ['correlation', 'partial correlation']:
        connectivities = ConnectivityMeasure(kind=kind).fit_transform(signals)
        conn_measure = ConnectivityMeasure(kind=kind, vectorize=True,
                                           discard_diagonal=True)
        vectorized_connectivities = conn_measure.fit_transform(signals)
        assert_array_almost_equal(
            conn_measure.inverse_transform(vectorized_connectivities),
            connectivities)

    for kind in ['covariance', 'precision']:
        connectivities = ConnectivityMeasure(kind=kind).fit_transform(signals)
        conn_measure = ConnectivityMeasure(kind=kind, vectorize=True,
                                           discard_diagonal=True)
        vectorized_connectivities = conn_measure.fit_transform(signals)
        diagonal = np.array([np.diagonal(conn) / sqrt(2) for conn in
                             connectivities])
        inverse_transformed = conn_measure.inverse_transform(
            vectorized_connectivities, diagonal=diagonal)
        assert_array_almost_equal(inverse_transformed, connectivities)
        assert_raises_regex(ValueError,
                            'can not reconstruct connectivity matrices',
                            conn_measure.inverse_transform,
                            vectorized_connectivities)

    # for 'tangent' kind, covariance matrices are reconstructed
    # without vectorization
    tangent_measure = ConnectivityMeasure(kind='tangent')
    displacements = tangent_measure.fit_transform(signals)
    covariances = ConnectivityMeasure(kind='covariance').fit_transform(
        signals)
    assert_array_almost_equal(
        tangent_measure.inverse_transform(displacements), covariances)

    # with vectorization
    # when diagonal has not been discarded
    tangent_measure = ConnectivityMeasure(kind='tangent', vectorize=True)
    vectorized_displacements = tangent_measure.fit_transform(signals)
    assert_array_almost_equal(
        tangent_measure.inverse_transform(vectorized_displacements),
        covariances)

    # when diagonal has been discarded
    tangent_measure = ConnectivityMeasure(kind='tangent', vectorize=True,
                                          discard_diagonal=True)
    vectorized_displacements = tangent_measure.fit_transform(signals)
    diagonal = np.array([np.diagonal(matrix) / sqrt(2) for matrix in
                         displacements])
    inverse_transformed = tangent_measure.inverse_transform(
        vectorized_displacements, diagonal=diagonal)
    assert_array_almost_equal(inverse_transformed, covariances)
    assert_raises_regex(ValueError,
                        'can not reconstruct connectivity matrices',
                        tangent_measure.inverse_transform,
                        vectorized_displacements)
Esempio n. 53
0
try:
    import sklearn
except ImportError:
    has_sklearn = False
    print('sklearn not available')


def cov2corr(cov):
    std_ = np.sqrt(np.diag(cov))
    corr = cov / np.outer(std_, std_)
    return corr

if has_sklearn:
    from sklearn.covariance import LedoitWolf, OAS, MCD

    lw = LedoitWolf(store_precision=False)
    lw.fit(rr, assume_centered=False)
    cov_lw = lw.covariance_
    corr_lw = cov2corr(cov_lw)

    oas = OAS(store_precision=False)
    oas.fit(rr, assume_centered=False)
    cov_oas = oas.covariance_
    corr_oas = cov2corr(cov_oas)

    mcd = MCD()#.fit(rr, reweight=None)
    mcd.fit(rr, assume_centered=False)
    cov_mcd = mcd.covariance_
    corr_mcd = cov2corr(cov_mcd)

    titles = ['raw correlation', 'lw', 'oas', 'mcd']
Esempio n. 54
0
r = 0.1
real_cov = toeplitz(r ** np.arange(n_features))
coloring_matrix = cholesky(real_cov)

n_samples_range = np.arange(6, 31, 1)
repeat = 100
lw_mse = np.zeros((n_samples_range.size, repeat))
oa_mse = np.zeros((n_samples_range.size, repeat))
lw_shrinkage = np.zeros((n_samples_range.size, repeat))
oa_shrinkage = np.zeros((n_samples_range.size, repeat))
for i, n_samples in enumerate(n_samples_range):
    for j in range(repeat):
        X = np.dot(
            np.random.normal(size=(n_samples, n_features)), coloring_matrix.T)

        lw = LedoitWolf(store_precision=False, assume_centered=True)
        lw.fit(X)
        lw_mse[i, j] = lw.error_norm(real_cov, scaling=False)
        lw_shrinkage[i, j] = lw.shrinkage_

        oa = OAS(store_precision=False, assume_centered=True)
        oa.fit(X)
        oa_mse[i, j] = oa.error_norm(real_cov, scaling=False)
        oa_shrinkage[i, j] = oa.shrinkage_

# plot MSE
plt.subplot(2, 1, 1)
plt.errorbar(n_samples_range, lw_mse.mean(1), yerr=lw_mse.std(1),
             label='Ledoit-Wolf', color='g')
plt.errorbar(n_samples_range, oa_mse.mean(1), yerr=oa_mse.std(1),
             label='OAS', color='r')