def _cov(X, shrinkage=None):
    """Estimate covariance matrix (using optional shrinkage).
    Parameters
    ----------
    X : array-like, shape (n_samples, n_features)
        Input data.
    shrinkage : string or float, optional
        Shrinkage parameter, possible values:
          - None or 'empirical': no shrinkage (default).
          - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.
          - float between 0 and 1: fixed shrinkage parameter.
    Returns
    -------
    s : array, shape (n_features, n_features)
        Estimated covariance matrix.
    """
    shrinkage = "empirical" if shrinkage is None else shrinkage
    if isinstance(shrinkage, str):
        if shrinkage == 'auto':
            sc = StandardScaler()  # standardize features
            X = sc.fit_transform(X)
            s = ledoit_wolf(X)[0]
            # rescale
            s = sc.scale_[:, np.newaxis] * s * sc.scale_[np.newaxis, :]
        elif shrinkage == 'empirical':
            s = empirical_covariance(X)
        else:
            raise ValueError('unknown shrinkage parameter')
    elif isinstance(shrinkage, float) or isinstance(shrinkage, int):
        if shrinkage < 0 or shrinkage > 1:
            raise ValueError('shrinkage parameter must be between 0 and 1')
        s = shrunk_covariance(empirical_covariance(X), shrinkage)
    else:
        raise TypeError('shrinkage must be of string or int type')
    return s
Esempio n. 2
0
def test_covariance():
    """Tests Covariance module on a simple dataset.

    """
    # test covariance fit from data
    cov = EmpiricalCovariance()
    cov.fit(X)
    assert_array_almost_equal(empirical_covariance(X), cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(empirical_covariance(X)), 0)
    assert_almost_equal(
        cov.error_norm(empirical_covariance(X), norm='spectral'), 0)
    assert_almost_equal(
        cov.error_norm(empirical_covariance(X), norm='frobenius'), 0)
    assert_almost_equal(
        cov.error_norm(empirical_covariance(X), scaling=False), 0)
    assert_almost_equal(
        cov.error_norm(empirical_covariance(X), squared=False), 0)
    # Mahalanobis distances computation test
    mahal_dist = cov.mahalanobis(X)
    assert(np.amax(mahal_dist) < 250)
    assert(np.amin(mahal_dist) > 50)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    cov = EmpiricalCovariance()
    cov.fit(X_1d)
    assert_array_almost_equal(empirical_covariance(X_1d), cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(empirical_covariance(X_1d)), 0)
    assert_almost_equal(
        cov.error_norm(empirical_covariance(X_1d), norm='spectral'), 0)

    # test integer type
    X_integer = np.asarray([[0, 1], [1, 0]])
    result = np.asarray([[0.25, -0.25], [-0.25, 0.25]])
    assert_array_almost_equal(empirical_covariance(X_integer), result)
Esempio n. 3
0
def test_covariance():
    """Tests Covariance module on a simple dataset.

    """
    # test covariance fit from data
    cov = EmpiricalCovariance()
    cov.fit(X)
    assert_array_almost_equal(empirical_covariance(X), cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(empirical_covariance(X)), 0)
    assert_almost_equal(
        cov.error_norm(empirical_covariance(X), norm='spectral'), 0)
    assert_almost_equal(
        cov.error_norm(empirical_covariance(X), norm='frobenius'), 0)
    assert_almost_equal(
        cov.error_norm(empirical_covariance(X), scaling=False), 0)
    assert_almost_equal(
        cov.error_norm(empirical_covariance(X), squared=False), 0)
    # Mahalanobis distances computation test
    mahal_dist = cov.mahalanobis(X)
    assert(np.amax(mahal_dist) < 250)
    assert(np.amin(mahal_dist) > 50)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    cov = EmpiricalCovariance()
    cov.fit(X_1d)
    assert_array_almost_equal(empirical_covariance(X_1d), cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(empirical_covariance(X_1d)), 0)
    assert_almost_equal(
        cov.error_norm(empirical_covariance(X_1d), norm='spectral'), 0)

    # test integer type
    X_integer = np.asarray([[0, 1], [1, 0]])
    result = np.asarray([[0.25, -0.25], [-0.25, 0.25]])
    assert_array_almost_equal(empirical_covariance(X_integer), result)
Esempio n. 4
0
def test_shrunk_covariance():
    """Tests ShrunkCovariance module on a simple dataset.

    """
    # compare shrunk covariance obtained from data and from MLE estimate
    cov = ShrunkCovariance(shrinkage=0.5)
    cov.fit(X)
    assert_array_almost_equal(
        shrunk_covariance(empirical_covariance(X), shrinkage=0.5),
        cov.covariance_, 4)

    # same test with shrinkage not provided
    cov = ShrunkCovariance()
    cov.fit(X)
    assert_array_almost_equal(
        shrunk_covariance(empirical_covariance(X)), cov.covariance_, 4)

    # same test with shrinkage = 0 (<==> empirical_covariance)
    cov = ShrunkCovariance(shrinkage=0.)
    cov.fit(X)
    assert_array_almost_equal(empirical_covariance(X), cov.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    cov = ShrunkCovariance(shrinkage=0.3)
    cov.fit(X_1d)
    assert_array_almost_equal(empirical_covariance(X_1d), cov.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    cov = ShrunkCovariance(shrinkage=0.5, store_precision=False)
    cov.fit(X)
    assert(cov.precision_ is None)
Esempio n. 5
0
    def _nonrobust_covariance(self, data, assume_centered=False):
        """Non-robust estimation of the covariance to be used within MCD.

        Parameters
        ----------
        data: array_like, shape (n_samples, n_features)
          Data for which to compute the non-robust covariance matrix.
        assume_centered: Boolean
          Whether or not the observations should be considered as centered.

        Returns
        -------
        nonrobust_covariance: array_like, shape (n_features, n_features)
          The non-robust covariance of the data.

        """
        try:
            cov, prec = graph_lasso(
                empirical_covariance(data, assume_centered=assume_centered),
                self.shrinkage)
        except:
            print " > Exception!"
            emp_cov = empirical_covariance(
                data, assume_centered=assume_centered)
            emp_cov.flat[::data.shape[1] + 1] += 1e-06
            cov, prec = graph_lasso(emp_cov, self.shrinkage)
        return cov
Esempio n. 6
0
def test_shrunk_covariance():
    # Tests ShrunkCovariance module on a simple dataset.
    # compare shrunk covariance obtained from data and from MLE estimate
    cov = ShrunkCovariance(shrinkage=0.5)
    cov.fit(X)
    assert_array_almost_equal(
        shrunk_covariance(empirical_covariance(X), shrinkage=0.5),
        cov.covariance_, 4)

    # same test with shrinkage not provided
    cov = ShrunkCovariance()
    cov.fit(X)
    assert_array_almost_equal(shrunk_covariance(empirical_covariance(X)),
                              cov.covariance_, 4)

    # same test with shrinkage = 0 (<==> empirical_covariance)
    cov = ShrunkCovariance(shrinkage=0.)
    cov.fit(X)
    assert_array_almost_equal(empirical_covariance(X), cov.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    cov = ShrunkCovariance(shrinkage=0.3)
    cov.fit(X_1d)
    assert_array_almost_equal(empirical_covariance(X_1d), cov.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    cov = ShrunkCovariance(shrinkage=0.5, store_precision=False)
    cov.fit(X)
    assert (cov.precision_ is None)
Esempio n. 7
0
    def _nonrobust_covariance(self, data, assume_centered=False):
        """Non-robust estimation of the covariance to be used within MCD.

        Parameters
        ----------
        data: array_like, shape (n_samples, n_features)
          Data for which to compute the non-robust covariance matrix.
        assume_centered: Boolean
          Whether or not the observations should be considered as centered.

        Returns
        -------
        nonrobust_covariance: array_like, shape (n_features, n_features)
          The non-robust covariance of the data.

        """
        try:
            cov, prec = graph_lasso(
                empirical_covariance(data, assume_centered=assume_centered),
                self.shrinkage)
        except:
            print " > Exception!"
            emp_cov = empirical_covariance(data,
                                           assume_centered=assume_centered)
            emp_cov.flat[::data.shape[1] + 1] += 1e-06
            cov, prec = graph_lasso(emp_cov, self.shrinkage)
        return cov
Esempio n. 8
0
def flgl_path(X_train, links=None, etas=[0.1], mus=[0.1],
              X_test=None, tol=1e-3, max_iter=200,
              update_rho=False, verbose=0, score='ebic',
              random_state=None):
    
    score_func = {'likelihood': log_likelihood,
                  'bic': BIC,
                  'ebic': partial(EBIC, n=X_test.shape[0]),
                  'ebicm': partial(EBIC_m, n=X_test.shape[0])}
    try:
        score_func = score_func[score]
    except KeyError:
        warnings.warn("The score type passed is not available, using log likelihood.")
        score_func = log_likelihood
    
    
    emp_cov = empirical_covariance(X_train)
    covariance_ = emp_cov.copy()
   
    covariances_ = list()
    precisions_ = list()
    hiddens_ =  list()
    scores_ = list()
    
    if X_test is not None:
        test_emp_cov = empirical_covariance(X_test)

    for eta in etas:
        for mu in mus:
            try:
                # Capture the errors, and move on
                cov_, prec_, hid_,_ = two_layers_fixed_links_GL(
                    emp_cov, links, mu, eta, max_iter=max_iter, 
                    random_state=random_state, return_n_iter=False)
                covariances_.append(cov_)
                precisions_.append(prec_)
                hiddens_.append(hid_)
                
                if X_test is not None:
                    this_score = score_func(test_emp_cov, prec_)
            except FloatingPointError:
                this_score = -np.inf
                covariances_.append(np.nan)
                precisions_.append(np.nan)
            if X_test is not None:
                if not np.isfinite(this_score):
                    this_score = -np.inf
                scores_.append(this_score)
            if verbose:
                if X_test is not None:
                    print('[graphical_lasso_path] eta: %.2e, mu: %.2e, score: %.2e'
                          % (eta, mu, this_score))
                else:
                    print('[graphical_lasso_path] eta: %.2e, mu: %.2e' % (eta, mu))
    if X_test is not None:
        return covariances_, precisions_, hiddens_, scores_
    return covariances_, precisions_, hiddens_
Esempio n. 9
0
def test_covariance():
    """Tests Covariance module on a simple dataset.

    """
    # test covariance fit from data
    cov = EmpiricalCovariance()
    cov.fit(X)
    emp_cov = empirical_covariance(X)
    assert_array_almost_equal(emp_cov, cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(emp_cov), 0)
    assert_almost_equal(
        cov.error_norm(emp_cov, norm='spectral'), 0)
    assert_almost_equal(
        cov.error_norm(emp_cov, norm='frobenius'), 0)
    assert_almost_equal(
        cov.error_norm(emp_cov, scaling=False), 0)
    assert_almost_equal(
        cov.error_norm(emp_cov, squared=False), 0)
    assert_raises(NotImplementedError,
                  cov.error_norm, emp_cov, norm='foo')
    # Mahalanobis distances computation test
    mahal_dist = cov.mahalanobis(X)
    print(np.amin(mahal_dist), np.amax(mahal_dist))
    assert(np.amin(mahal_dist) > 0)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    cov = EmpiricalCovariance()
    cov.fit(X_1d)
    assert_array_almost_equal(empirical_covariance(X_1d), cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(empirical_covariance(X_1d)), 0)
    assert_almost_equal(
        cov.error_norm(empirical_covariance(X_1d), norm='spectral'), 0)

    # test with one sample
    # FIXME I don't know what this test does
    X_1sample = np.arange(5)
    cov = EmpiricalCovariance()
    assert_warns(UserWarning, cov.fit, X_1sample)
    assert_array_almost_equal(cov.covariance_,
                              np.zeros(shape=(5, 5), dtype=np.float64))

    # test integer type
    X_integer = np.asarray([[0, 1], [1, 0]])
    result = np.asarray([[0.25, -0.25], [-0.25, 0.25]])
    assert_array_almost_equal(empirical_covariance(X_integer), result)

    # test centered case
    cov = EmpiricalCovariance(assume_centered=True)
    cov.fit(X)
    assert_array_equal(cov.location_, np.zeros(X.shape[1]))
Esempio n. 10
0
def test_covariance():
    """Tests Covariance module on a simple dataset.

    """
    # test covariance fit from data
    cov = EmpiricalCovariance()
    cov.fit(X)
    emp_cov = empirical_covariance(X)
    assert_array_almost_equal(emp_cov, cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(emp_cov), 0)
    assert_almost_equal(
        cov.error_norm(emp_cov, norm='spectral'), 0)
    assert_almost_equal(
        cov.error_norm(emp_cov, norm='frobenius'), 0)
    assert_almost_equal(
        cov.error_norm(emp_cov, scaling=False), 0)
    assert_almost_equal(
        cov.error_norm(emp_cov, squared=False), 0)
    assert_raises(NotImplementedError,
                  cov.error_norm, emp_cov, norm='foo')
    # Mahalanobis distances computation test
    mahal_dist = cov.mahalanobis(X)
    print np.amin(mahal_dist), np.amax(mahal_dist)
    assert(np.amin(mahal_dist) > 0)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    cov = EmpiricalCovariance()
    cov.fit(X_1d)
    assert_array_almost_equal(empirical_covariance(X_1d), cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(empirical_covariance(X_1d)), 0)
    assert_almost_equal(
        cov.error_norm(empirical_covariance(X_1d), norm='spectral'), 0)

    # test with one sample
    X_1sample = np.arange(5)
    cov = EmpiricalCovariance()
    with warnings.catch_warnings(record=True):
        cov.fit(X_1sample)

    # test integer type
    X_integer = np.asarray([[0, 1], [1, 0]])
    result = np.asarray([[0.25, -0.25], [-0.25, 0.25]])
    assert_array_almost_equal(empirical_covariance(X_integer), result)

    # test centered case
    cov = EmpiricalCovariance(assume_centered=True)
    cov.fit(X)
    assert_equal(cov.location_, np.zeros(X.shape[1]))
Esempio n. 11
0
def test_covariance():
    # Tests Covariance module on a simple dataset.
    # test covariance fit from data
    cov = EmpiricalCovariance()
    cov.fit(X)
    emp_cov = empirical_covariance(X)
    assert_array_almost_equal(emp_cov, cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(emp_cov), 0)
    assert_almost_equal(cov.error_norm(emp_cov, norm='spectral'), 0)
    assert_almost_equal(cov.error_norm(emp_cov, norm='frobenius'), 0)
    assert_almost_equal(cov.error_norm(emp_cov, scaling=False), 0)
    assert_almost_equal(cov.error_norm(emp_cov, squared=False), 0)
    with pytest.raises(NotImplementedError):
        cov.error_norm(emp_cov, norm='foo')
    # Mahalanobis distances computation test
    mahal_dist = cov.mahalanobis(X)
    assert np.amin(mahal_dist) > 0

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    cov = EmpiricalCovariance()
    cov.fit(X_1d)
    assert_array_almost_equal(empirical_covariance(X_1d), cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(empirical_covariance(X_1d)), 0)
    assert_almost_equal(
        cov.error_norm(empirical_covariance(X_1d), norm='spectral'), 0)

    # test with one sample
    # Create X with 1 sample and 5 features
    X_1sample = np.arange(5).reshape(1, 5)
    cov = EmpiricalCovariance()
    warn_msg = (
        "Only one sample available. You may want to reshape your data array")
    with pytest.warns(UserWarning, match=warn_msg):
        cov.fit(X_1sample)

    assert_array_almost_equal(cov.covariance_,
                              np.zeros(shape=(5, 5), dtype=np.float64))

    # test integer type
    X_integer = np.asarray([[0, 1], [1, 0]])
    result = np.asarray([[0.25, -0.25], [-0.25, 0.25]])
    assert_array_almost_equal(empirical_covariance(X_integer), result)

    # test centered case
    cov = EmpiricalCovariance(assume_centered=True)
    cov.fit(X)
    assert_array_equal(cov.location_, np.zeros(X.shape[1]))
Esempio n. 12
0
def LinRegErr(A,B,iteractions=50): #retorna os erros do slope e do intercept

	n=len(A)
	meanA=np.mean(A)
	meanB=np.mean(B)
	Iteractions=iteractions

	cont=0

	Slps=[]
	Ints=[]
	Covs=[]


	while	cont <Iteractions:

		Sample= mvn.rvs(mean=[meanA,meanB], cov=np.cov(A,B),size=n) # criando amostras de tamanho 'n' extraída da distribuição estimada da população
		popt, pcov =curve_fit(linf,Sample[:,0],Sample[:,1]) #ajustando a reta
		Slope=popt[0]
		Intercept=popt[1]
		Sample_cov=empirical_covariance(Sample)[0,1]
		Slps=np.append(Slps,Slope)
		Ints=np.append(Ints, Intercept)
		Covs=np.append(Covs,Sample_cov)

		print(cont)
		cont+=1	

	return(np.std(Slps),np.std(Ints),np.mean(Sample_cov))	
def get_cov(data):
    dat = data.training_data_all_ways + data.testing_data_all_ways
    num_ways = len(data.get_list_of_ways())
    m = {}
    i = 0
    for way in data.get_list_of_ways():
        m[way] = i
        i += 1
    mat = np.zeros((num_ways,num_ways))
    for elem in dat:
        ways = elem[1]
        for way in ways:
            mat[m[way],m[way]] = mat[m[way],m[way]] + 1
        for w1 in ways:
            for w2 in ways:
                if w1 == w2: continue
                mat[m[w1],m[w2]] = mat[m[w1],m[w2]] + 1
    print mat
    emp_cov = empirical_covariance(mat)
    print emp_cov
    corr = np.zeros((num_ways,num_ways))
    for i in range(num_ways):
        for j in range(num_ways):
            corr[i,j] = emp_cov[i,j]/(math.sqrt(emp_cov[i,i])*math.sqrt(emp_cov[j,j]))
    print corr
    sns.heatmap(corr,vmin = -1, vmax = 1,square=True,xticklabels=m.keys(),yticklabels=m.keys())
    sns.plt.title("Covariance of WAYS frequencies")
    sns.plt.show()
Esempio n. 14
0
def launch_mcd_on_dataset(n_samples, n_features, n_outliers, tol_loc, tol_cov,
                          tol_support):
    rand_gen = np.random.RandomState(0)
    data = rand_gen.randn(n_samples, n_features)
    # add some outliers
    outliers_index = rand_gen.permutation(n_samples)[:n_outliers]
    outliers_offset = 10. * \
                      (rand_gen.randint(2, size=(n_outliers, n_features)) - 0.5)
    data[outliers_index] += outliers_offset
    inliers_mask = np.ones(n_samples).astype(bool)
    inliers_mask[outliers_index] = False

    pure_data = data[inliers_mask]
    # compute MCD by fitting an object
    mcd_fit = MinCovDet(random_state=rand_gen).fit(data)
    T = mcd_fit.location_
    S = mcd_fit.covariance_
    H = mcd_fit.support_
    # compare with the estimates learnt from the inliers
    error_location = np.mean((pure_data.mean(0) - T) ** 2)
    assert (error_location < tol_loc)
    error_cov = np.mean((empirical_covariance(pure_data) - S) ** 2)
    assert (error_cov < tol_cov)
    assert (np.sum(H) >= tol_support)
    assert_array_almost_equal(mcd_fit.mahalanobis(data), mcd_fit.dist_)
    def fit(self, X, y=None):
        """Fit the GraphLasso model to X.

        Parameters
        ----------
        X : ndarray, shape (n_time, n_samples, n_features), or
                (n_samples, n_features, n_time)
            Data from which to compute the covariance estimate.
            If shape is (n_samples, n_features, n_time), then set
            `bypass_transpose = False`.
        y : (ignored)
        """
        if not self.bypass_transpose:
            X = X.transpose(2, 0, 1)  # put time as first dimension
        # Covariance does not make sense for a single feature
        # X = check_array(X, allow_nd=True, estimator=self)
        # if X.ndim != 3:
        #     raise ValueError("Found array with dim %d. %s expected <= 2."
        #                      % (X.ndim, self.__class__.__name__))
        X = np.array([check_array(x, ensure_min_features=2,
                      ensure_min_samples=2, estimator=self) for x in X])
        if self.assume_centered:
            self.location_ = np.zeros((X.shape[0], 1, X.shape[2]))
        else:
            self.location_ = X.mean(1).reshape(X.shape[0], 1, X.shape[2])
        self.emp_cov = np.array([empirical_covariance(
            x, assume_centered=self.assume_centered) for x in X])
        self.precision_, self.latent_, self.covariance_, self.n_iter_ = \
            latent_time_graph_lasso(
                self.emp_cov, alpha=self.alpha, tau=self.tau, rho=self.rho,
                beta=self.beta, eta=self.eta, mode=self.mode,
                tol=self.tol, rtol=self.rtol, psi=self.psi, phi=self.phi,
                max_iter=self.max_iter, verbose=self.verbose,
                return_n_iter=True, return_history=False)
        return self
Esempio n. 16
0
def test_oas():
    """Tests OAS module on a simple dataset.

    """
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X, assume_centered=True)
    assert_almost_equal(oa.shrinkage_, 0.018740, 4)
    assert_almost_equal(oa.score(X, assume_centered=True), -5.03605, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X, assume_centered=True)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    oa = OAS()
    oa.fit(X_1d, assume_centered=True)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X, assume_centered=True)
    assert_almost_equal(oa.score(X, assume_centered=True), -5.03605, 4)
    assert(oa.precision_ is None)

    ### Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X)
    assert_almost_equal(oa.shrinkage_, 0.020236, 4)
    assert_almost_equal(oa.score(X), 2.079025, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    oa = OAS()
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X)
    assert_almost_equal(oa.score(X), 2.079025, 4)
    assert(oa.precision_ is None)
Esempio n. 17
0
def construct_motion_gaussian_models(char_dict):
    # fit a 2D gaussian model for each key
    # plot the mean and variance ellipse on the keyboard layout
    # img = mpimg.imread('keyboard_screen_shot.jpg')
    # imgplot = plt.imshow(img)
    # convert data to numpy
    char_list = [
        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
        'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', ' '
    ]

    # create a gaussian model for every character

    model_dict = {}

    scaler = MinMaxScaler()

    for char in char_list:
        if char in char_dict.keys():

            # print(char)
            # print(np.array(char_dict[char]).T.shape)

            dim = np.array(char_dict[char]).shape
            X = np.array(char_dict[char])
            scaler.fit(X)
            # X_t = scaler.transform(X)
            X_t = X
            dim = X_t.shape
            mu = np.mean(X_t, axis=0)

            # print(np.array(char_dict[char].shape))
            if dim[0] > 1:
                # cannot use np.cov when there is only one data point
                # cov = np.cov(np.array(char_dict[char]).T)
                cov = empirical_covariance(X_t)
                # print(cov)
            else:
                cov = np.zeros((dim[1], dim[1]))
            # save the model parameter in a dictionary
            model_dict[char] = {}
            model_dict[char]['mean'] = mu

            # model_dict[char]['cov'] = np.diag(np.diag(cov))
            model_dict[char]['cov'] = cov

            sigma_det = np.linalg.det(cov)

            # by definition sigma_det should not be zero!!!
            # as the covariance matrix is semi-positive definite
            if sigma_det <= 0:
                print(char + " : " + str(sigma_det))
                print(X_t.shape)
                print(cov)

            # print(cov.shape)
            # print("Generated plot for " + char)
    # plt.scatter(x, y)
    # plt.savefig(posture + '_training_data_pattern/' + posture + '_2D_Gauss.png', dpi=200)
    return model_dict
Esempio n. 18
0
    def fit(self, X, y):
        """Fit the TimeGraphicalLasso model to X.

        Parameters
        ----------
        X : ndarray, shape = (n_samples * n_times, n_dimensions)
            Data matrix.
        y : ndarray, shape = (n_times,)
            Indicate the temporal belonging of each sample.

        """
        # Covariance does not make sense for a single feature
        X, y = check_X_y(X, y, accept_sparse=False, dtype=np.float64, order="C", ensure_min_features=2, estimator=self)

        n_dimensions = X.shape[1]
        self.classes_, n_samples = np.unique(y, return_counts=True)
        n_times = self.classes_.size

        # n_samples = np.array([x.shape[0] for x in X])
        if self.assume_centered:
            self.location_ = np.zeros((n_times, n_dimensions))
        else:
            self.location_ = np.array([X[y == cl].mean(0) for cl in self.classes_])

        emp_cov = np.array(
            [empirical_covariance(X[y == cl], assume_centered=self.assume_centered) for cl in self.classes_]
        )

        return self._fit(emp_cov, n_samples)
Esempio n. 19
0
    def score(self, X_test, y=None):
        """Computes the log-likelihood of a Gaussian data set with
        `self.covariance_` as an estimator of its covariance matrix.

        Parameters
        ----------
        X_test : lenght-2 list of array-like of shape (n_samples1, n_features1)
                 and (n_samples2, n_features2)
            Test data of which we compute the likelihood, where n_samples is
            the number of samples and n_features is the number of features.
            X_test is assumed to be drawn from the same distribution than
            the data used in fit (including centering). The number of features
            must correspond.

        y : not used, present for API consistence purpose.

        Returns
        -------
        res : float
            The likelihood of the data set with `self.covariance1_` and
            `self.covariance1_`as an estimator of its covariance matrix.

        """

        if self.covariance_ is None :
            sys.error("The estimator is not fit on training data.")
            sys.exit(0)

        check_data_dimensions(X_test, layers=2)
        # compute empirical covariance of the test set
        test_cov = empirical_covariance(x - self.location_, assumed_centered=True)

        res = log_likelihood(test_cov, self.get_precision())
        return res
Esempio n. 20
0
    def fit(self, X, y=None):
        """Fit the GraphLasso model to X.

        Parameters
        ----------
        X : array-like shape (n_samples, n_features) 
            Data from which to compute the covariance estimate.
        y : (ignored)
        """
        self.random_state = check_random_state(self.random_state)
       # check_data_dimensions(X, layers=2)
        X = check_array(X, ensure_min_features=2,
                         ensure_min_samples=2, estimator=self)

        self.X_train = X
        if self.assume_centered:
            self.location_ = np.zeros((X.shape[0],  X.shape[1]))
        else:
            self.location_ = X.mean(0)

        emp_cov = empirical_covariance(
            		X, assume_centered=self.assume_centered)
        self.precision_,  self.hidden_, \
        self.observed_,  self.emp_cov, \
        self.n_iter_ = two_layers_fixed_links_GL(
                        emp_cov, self.L, eta=self.eta, mu=self.mu,
                        rho=self.rho, tol=self.tol, rtol=self.rtol,
                        max_iter=self.max_iter, verbose=self.verbose,
                        return_n_iter=True, return_history=False,
                        compute_objective=self.compute_objective)
        return self
Esempio n. 21
0
def test_graphical_lasso_iris_singular():
    # Small subset of rows to test the rank-deficient case
    # Need to choose samples such that none of the variances are zero
    indices = np.arange(10, 13)

    # Hard-coded solution from R glasso package for alpha=0.01
    cov_R = np.array([
        [0.08, 0.056666662595, 0.00229729713223, 0.00153153142149],
        [0.056666662595, 0.082222222222, 0.00333333333333, 0.00222222222222],
        [0.002297297132, 0.003333333333, 0.00666666666667, 0.00009009009009],
        [0.001531531421, 0.002222222222, 0.00009009009009, 0.00222222222222],
    ])
    icov_R = np.array([
        [24.42244057, -16.831679593, 0.0, 0.0],
        [-16.83168201, 24.351841681, -6.206896552, -12.5],
        [0.0, -6.206896171, 153.103448276, 0.0],
        [0.0, -12.499999143, 0.0, 462.5],
    ])
    X = datasets.load_iris().data[indices, :]
    emp_cov = empirical_covariance(X)
    for method in ("cd", "lars"):
        cov, icov = graphical_lasso(emp_cov,
                                    alpha=0.01,
                                    return_costs=False,
                                    mode=method)
        assert_array_almost_equal(cov, cov_R, decimal=5)
        assert_array_almost_equal(icov, icov_R, decimal=5)
Esempio n. 22
0
def test_graph_lasso(random_state=0):
    # Sample data from a sparse multivariate normal
    dim = 20
    n_samples = 100
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.95,
                                  random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    emp_cov = empirical_covariance(X)

    for alpha in (.1, .01):
        covs = dict()
        for method in ('cd', 'lars'):
            cov_, _, costs = graph_lasso(emp_cov, alpha=.1, return_costs=True)
            covs[method] = cov_
            costs, dual_gap = np.array(costs).T
            # Check that the costs always decrease
            assert_array_less(np.diff(costs), 0)
        # Check that the 2 approaches give similar results
        assert_array_almost_equal(covs['cd'], covs['lars'])

    # Smoke test the estimator
    model = GraphLasso(alpha=.1).fit(X)
    assert_array_almost_equal(model.covariance_, covs['cd'])
Esempio n. 23
0
def test_graph_lasso(random_state=0):
    # Sample data from a sparse multivariate normal
    dim = 20
    n_samples = 100
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.95,
                                  random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    emp_cov = empirical_covariance(X)

    for alpha in (.1, .01):
        covs = dict()
        for method in ('cd', 'lars'):
            cov_, _, costs = graph_lasso(emp_cov, alpha=.1, return_costs=True)
            covs[method] = cov_
            costs, dual_gap = np.array(costs).T
            # Check that the costs always decrease
            assert_array_less(np.diff(costs), 0)
        # Check that the 2 approaches give similar results
        assert_array_almost_equal(covs['cd'], covs['lars'])

    # Smoke test the estimator
    model = GraphLasso(alpha=.1).fit(X)
    assert_array_almost_equal(model.covariance_, covs['cd'])
def test_graphical_lasso_iris():
    # Hard-coded solution from R glasso package for alpha=1.0
    # (need to set penalize.diagonal to FALSE)
    cov_R = np.array(
        [
            [0.68112222, 0.0000000, 0.265820, 0.02464314],
            [0.00000000, 0.1887129, 0.000000, 0.00000000],
            [0.26582000, 0.0000000, 3.095503, 0.28697200],
            [0.02464314, 0.0000000, 0.286972, 0.57713289],
        ]
    )
    icov_R = np.array(
        [
            [1.5190747, 0.000000, -0.1304475, 0.0000000],
            [0.0000000, 5.299055, 0.0000000, 0.0000000],
            [-0.1304475, 0.000000, 0.3498624, -0.1683946],
            [0.0000000, 0.000000, -0.1683946, 1.8164353],
        ]
    )
    X = datasets.load_iris().data
    emp_cov = empirical_covariance(X)
    for method in ("cd", "lars"):
        cov, icov = graphical_lasso(emp_cov, alpha=1.0, return_costs=False, mode=method)
        assert_array_almost_equal(cov, cov_R)
        assert_array_almost_equal(icov, icov_R)
Esempio n. 25
0
def get_moment(samples, name):
    if name == 'covariance':
        return empirical_covariance(samples)[0, 1]
    elif name == 'var':
        return np.square(np.std(samples, axis=0))
    else:
        raise ValueError("unknown name: {}".format(name))
    def pca_it(self, spec, recipes, process_recipe):
        MB_SIZE = 10
        process_func = ProcessFunc(process_recipe, spec)
        output_director = MinibatchOutputDirector2(
            MB_SIZE,
            x_shape=(spec['target_channels'], spec['target_h'],
                     spec['target_w']),
            y_shape=(self.Y_SHAPE, ))

        iterator = create_standard_iterator(process_func,
                                            recipes,
                                            output_director,
                                            pool_size=6,
                                            buffer_size=40,
                                            chunk_size=MB_SIZE * 3)

        print 'computing eigenvalues ...'
        X = np.concatenate(
            [batch['mb_x'][0, ...].reshape((3, -1)).T for batch in iterator])
        n = X.shape[0]
        limit = 125829120
        if n > limit:
            X = X[np.random.randint(n, size=limit), :]
        print X.shape
        cov = empirical_covariance(X)
        print cov
        evs, U = eigh(cov)
        print evs
        print U

        return evs, U
Esempio n. 27
0
    def fit(self, X, y=None):
        """Fits the GraphLasso model to X.

        Parameters
        ----------
        X : ndarray, shape (n_samples, n_features)
            Data from which to compute the covariance estimate
        y : (ignored)

        """
        # Covariance does not make sense for a single feature
        X = check_array(X, ensure_min_features=2, ensure_min_samples=2,
                        estimator=self)
        if self.assume_centered:
            self.location_ = np.zeros(X.shape[1])
        else:
            self.location_ = X.mean(0)

        emp_cov = empirical_covariance(X, assume_centered=self.assume_centered)
        self.precision_, self.covariance_, self.n_iter_ = graph_lasso(
            emp_cov, alpha=self.alpha, tol=self.tol, rtol=self.rtol,
            max_iter=self.max_iter, over_relax=self.over_relax, rho=self.rho,
            verbose=self.verbose, return_n_iter=True, return_history=False,
            mode=self.mode, update_rho_options=self.update_rho_options,
            compute_objective=self.compute_objective)
        return self
Esempio n. 28
0
def friedman_results(data_grid, K, K_obs, ells, alpha):
    from rpy2.robjects.packages import importr
    glasso = importr('glasso').glasso

    tic = time.time()
    iters = []
    precisions = []
    for d in data_grid.transpose(2, 0, 1):
        emp_cov = empirical_covariance(d)
        out = glasso(emp_cov, alpha)
        iters.append(int(out[-1][0]))
        precisions.append(np.array(out[1]))
    tac = time.time()
    iterations = np.max(iters)
    precisions = np.array(precisions)
    F1score = utils.structure_error(K, precisions)['f1']
    MSE_observed = None
    MSE_precision = utils.error_norm(K, precisions, upper_triangular=True)
    MSE_latent = None
    mean_rank_error = None

    res = dict(n_dim_obs=K.shape[1],
               time=tac - tic,
               iterations=iterations,
               F1score=F1score,
               MSE_precision=MSE_precision,
               MSE_observed=MSE_observed,
               MSE_latent=MSE_latent,
               mean_rank_error=mean_rank_error,
               likelihood=likelihood_score(data_grid.transpose(2, 0, 1),
                                           precisions),
               note=None,
               estimator=None)

    return res
Esempio n. 29
0
def chandresekeran_results(data_grid, K, K_obs, ells, tau, alpha, **kwargs):
    emp_cov = np.array([
        empirical_covariance(x, assume_centered=True)
        for x in data_grid.transpose(2, 0, 1)
    ]).transpose(1, 2, 0)

    rho = 1. / np.sqrt(data_grid.shape[0])

    result = lvglasso(emp_cov, alpha, tau, rho)
    ma_output = Bunch(**result)

    R = np.array(ma_output.R).T
    S = np.array(ma_output.S).T
    L = np.array(ma_output.L).T

    ss = utils.structure_error(K, S)
    MSE_observed = utils.error_norm(K_obs, R)
    MSE_precision = utils.error_norm(K, S, upper_triangular=True)
    MSE_latent = utils.error_norm(ells, L)
    mean_rank_error = utils.error_rank(ells, L)

    res = dict(n_dim_obs=K.shape[1],
               time=ma_output.elapsed_time,
               iterations=np.max(ma_output.iter),
               MSE_precision=MSE_precision,
               MSE_observed=MSE_observed,
               MSE_latent=MSE_latent,
               mean_rank_error=mean_rank_error,
               note=None,
               estimator=ma_output,
               likelihood=likelihood_score(data_grid.transpose(2, 0, 1), R),
               latent=L)

    res = dict(res, **ss)
    return res
Esempio n. 30
0
def GWishartFit(X, G, GWprior, mode='covsel'):
    """Fit G-Wishart distribution."""
    n_samples, n_dim = X.shape

    d0 = GWprior.d0
    S0 = GWprior.S0

    # check prior size violations
    if G.shape[0] != n_dim or G.shape[1] != n_dim:
        raise ValueError('G must be p-by-p, with p dimensions X')
    if S0.shape[0] != n_dim or S0.shape[1] != n_dim:
        raise ValueError('GWprior.S0 must be p-by-p, with p dimensions X')

    # compute posterior scatter matrix
    dn = n_samples + d0

    # X'*X - but I dont assume X to be centered
    emp_cov = empirical_covariance(X)
    S = n_samples * emp_cov
    C = (S + S0) / (dn - 2)

    if mode == 'covsel':
        precision = precision_selection(G, n_dim, C)
    else:
        # use graph_lasso
        # convert G to alpha
        alpha = np.zeros_like(G, dtype=float)
        alpha[~(G + G.T)] = np.inf
        precision = graphical_lasso(emp_cov=C, alpha=alpha)[0]

    return precision, S
def launch_mcd_on_dataset(n_samples, n_features, n_outliers, tol_loc, tol_cov,
                          tol_support):

    rand_gen = np.random.RandomState(0)
    data = rand_gen.randn(n_samples, n_features)
    # add some outliers
    outliers_index = rand_gen.permutation(n_samples)[:n_outliers]
    outliers_offset = 10. * \
        (rand_gen.randint(2, size=(n_outliers, n_features)) - 0.5)
    data[outliers_index] += outliers_offset
    inliers_mask = np.ones(n_samples).astype(bool)
    inliers_mask[outliers_index] = False

    pure_data = data[inliers_mask]
    # compute MCD by fitting an object
    mcd_fit = MinCovDet(random_state=rand_gen).fit(data)
    T = mcd_fit.location_
    S = mcd_fit.covariance_
    H = mcd_fit.support_
    # compare with the estimates learnt from the inliers
    error_location = np.mean((pure_data.mean(0) - T) ** 2)
    assert(error_location < tol_loc)
    error_cov = np.mean((empirical_covariance(pure_data) - S) ** 2)
    assert(error_cov < tol_cov)
    assert(np.sum(H) >= tol_support)
    assert_array_almost_equal(mcd_fit.mahalanobis(data), mcd_fit.dist_)
def test_graphical_lasso_iris_singular():
    # Small subset of rows to test the rank-deficient case
    # Need to choose samples such that none of the variances are zero
    indices = np.arange(10, 13)

    # Hard-coded solution from R glasso package for alpha=0.01
    cov_R = np.array([
        [0.08, 0.056666662595, 0.00229729713223, 0.00153153142149],
        [0.056666662595, 0.082222222222, 0.00333333333333, 0.00222222222222],
        [0.002297297132, 0.003333333333, 0.00666666666667, 0.00009009009009],
        [0.001531531421, 0.002222222222, 0.00009009009009, 0.00222222222222]
    ])
    icov_R = np.array([
        [24.42244057, -16.831679593, 0.0, 0.0],
        [-16.83168201, 24.351841681, -6.206896552, -12.5],
        [0.0, -6.206896171, 153.103448276, 0.0],
        [0.0, -12.499999143, 0.0, 462.5]
    ])
    X = datasets.load_iris().data[indices, :]
    emp_cov = empirical_covariance(X)
    for method in ('cd', 'lars'):
        cov, icov = graphical_lasso(emp_cov, alpha=0.01, return_costs=False,
                                    mode=method)
        assert_array_almost_equal(cov, cov_R, decimal=5)
        assert_array_almost_equal(icov, icov_R, decimal=5)
Esempio n. 33
0
def save(X, Y, **kwargs):
    """
    Parameters
    ----------
    X : array-like, shape = [N, D]
        Training data, where N is the number of samples and
        D is the number of features.
    Y : array-like, shape = [N]
        Response variable, where n_samples is the number of samples


    Argument dictionary should contain:
    kwargs = {
        'd' : intrinsic dimension (int)
        'n_levelsets' : number of slices to use (int)
        'split_by' : 'dyadic' (dyadic decomposition) or 'stateq' (statistically equivalent blocks) (default: 'dyadic')
        'return_mat' : Boolean whether key SIR matrix should be returned (defaults
                    to False).
    }

    Returns
    -----------
    proj_vecs : array-like, shape = [n_features, d]
        Orthonormal system spanning the sufficient dimension subspace, where
        d refers to the intrinsic dimension.

    M : SAVE matrix, only if return_mat option is True
    }
    """
    # Extract arguments from dictionary
    d = kwargs['d']
    n_levelsets = kwargs['n_levelsets']
    split_by = kwargs.get('split_by', 'dyadic')
    return_mat = kwargs.get('return_mat', False)
    N, D = X.shape
    # Standardize X
    Z, cov_all_sqrtinv = whiten_data(X)
    # Create partition
    labels = split(Y, n_levelsets, split_by)
    M = np.zeros((D, D))  # Container for key matrix in SIR
    # Compute SAVE matrix
    empirical_probabilities = np.zeros(n_levelsets)
    for i in range(n_levelsets):
        empirical_probabilities[i] = float(len(
            np.where(labels == i)[0])) / float(N)
        if empirical_probabilities[i] == 0:
            continue
        cov_sub = empirical_covariance(
            Z[labels == i, :])  # Covariance of all samples
        M += empirical_probabilities[i] * (np.eye(D) -
                                           cov_sub).dot(np.eye(D) - cov_sub)
    U, S, V = np.linalg.svd(M)
    # Apply inverse transformation
    vecs = cov_all_sqrtinv.dot(U[:, :d])
    proj_vecs, dummy = np.linalg.qr(vecs)
    if return_mat:
        return proj_vecs, M
    else:
        return proj_vecs
Esempio n. 34
0
def kfold_cv(X, K=10, isotonic=True):
    """K-fold cross-validated eigenvalues for LW nonlinear shrinkage"""
    S = empirical_covariance(X)
    lam, U = np.linalg.eigh(S)
    d = _nls_cv(X, S, K)
    if isotonic:
        d = isotonic_regression(d, increasing=True)
    return U @ np.diag(d) @ U.T
Esempio n. 35
0
 def fit(self, X, alpha):
     self.alpha = alpha
     emp_cov = empirical_covariance(X)
     self.covariance_, self.precision_ = graph_lasso(emp_cov,
                                                     alpha=self.alpha,
                                                     tol=self.tol,
                                                     max_iter=self.max_iter)
     return self.covariance_, self.precision_
Esempio n. 36
0
def empirical_covariances(subjects, assume_centered=False, standardize=False):
    """Compute empirical covariances for several signals.

    Parameters
    ----------
    subjects : list of numpy.ndarray, shape for each (n_samples, n_features)
        input subjects. Each subject is a 2D array, whose columns contain
        signals. Sample number can vary from subject to subject, but all
        subjects must have the same number of features (i.e. of columns).

    assume_centered : bool, optional
        if True, assume that all input signals are centered. This slightly
        decreases computation time by avoiding useless computation.
        Default=False.

    standardize : bool, optional
        if True, set every signal variance to one before computing their
        covariance matrix (i.e. compute a correlation matrix).
        Default=False.

    Returns
    -------
    emp_covs : numpy.ndarray, shape : (feature number, feature number, subject number)
        empirical covariances.

    n_samples : numpy.ndarray, shape: (subject number,)
        number of samples for each subject. dtype is np.float64.

    """
    if not hasattr(subjects, "__iter__"):
        raise ValueError("'subjects' input argument must be an iterable. "
                         "You provided {0}".format(subjects.__class__))

    n_subjects = [s.shape[1] for s in subjects]
    if len(set(n_subjects)) > 1:
        raise ValueError("All subjects must have the same number of "
                         "features.\nYou provided: {0}".format(
                             str(n_subjects)))
    n_subjects = len(subjects)
    n_features = subjects[0].shape[1]

    # Enable to change dtype here because depending on user, conversion from
    # single precision to double will be required or not.
    emp_covs = np.empty((n_features, n_features, n_subjects), order="F")
    for k, s in enumerate(subjects):
        if standardize:
            s = s / s.std(axis=0)  # copy on purpose
        M = empirical_covariance(s, assume_centered=assume_centered)

        # Force matrix symmetry, for numerical stability
        # of _group_sparse_covariance
        emp_covs[..., k] = M + M.T
    emp_covs /= 2

    n_samples = np.asarray([s.shape[0] for s in subjects], dtype=np.float64)

    return emp_covs, n_samples
Esempio n. 37
0
def likelihood_score(X, precision_):
    # compute empirical covariance of the test set
    location_ = X.mean(1).reshape(X.shape[0], 1, X.shape[2])
    test_cov = np.array(
        [empirical_covariance(x, assume_centered=True) for x in X - location_])

    res = sum(log_likelihood(S, K) for S, K in zip(test_cov, precision_))

    return res
Esempio n. 38
0
def covariances():
	subject_to_means, subject_to_values = load_data(TRAINING_DATA_FILENAME, True)
	subject_to_covariance = {}
	full_matrix = None
	for key in subject_to_values.keys():
		if full_matrix is None:	
			full_matrix = subject_to_values[key]
			subject_to_covariance[key] = empirical_covariance(subject_to_values[key])
			print subject_to_means[key]
			print subject_to_covariance[key]
		else:
			full_matrix = np.append(full_matrix, subject_to_values[key], axis = 0)
			subject_to_covariance[key] = empirical_covariance(subject_to_values[key])
	
	full_mean = full_matrix.mean(axis=0)
	full_covariance = empirical_covariance(full_matrix)
	print full_mean
	print full_covariance
	return subject_to_covariance, full_covariance, full_mean
Esempio n. 39
0
 def objective_function(self, data, location, covariance):
     """Objective function minimized at each step of the MCD algorithm.
     """
     precision = pinvh(covariance)
     det = fast_logdet(precision)
     trace = np.trace(
         np.dot(empirical_covariance(data - location, assume_centered=True),
                precision))
     pen = self.shrinkage * np.trace(precision)
     return -det + trace + pen
Esempio n. 40
0
def test_covariance():
    """Tests Covariance module on a simple dataset.

    """
    # test covariance fit from data
    cov = EmpiricalCovariance()
    cov.fit(X)
    emp_cov = empirical_covariance(X)
    assert_array_almost_equal(emp_cov, cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(emp_cov), 0)
    assert_almost_equal(cov.error_norm(emp_cov, norm="spectral"), 0)
    assert_almost_equal(cov.error_norm(emp_cov, norm="frobenius"), 0)
    assert_almost_equal(cov.error_norm(emp_cov, scaling=False), 0)
    assert_almost_equal(cov.error_norm(emp_cov, squared=False), 0)
    assert_raises(NotImplementedError, cov.error_norm, emp_cov, norm="foo")
    # Mahalanobis distances computation test
    mahal_dist = cov.mahalanobis(X)
    print(np.amin(mahal_dist), np.amax(mahal_dist))
    assert np.amin(mahal_dist) > 0

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    cov = EmpiricalCovariance()
    cov.fit(X_1d)
    assert_array_almost_equal(empirical_covariance(X_1d), cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(empirical_covariance(X_1d)), 0)
    assert_almost_equal(cov.error_norm(empirical_covariance(X_1d), norm="spectral"), 0)

    # test with one sample
    X_1sample = np.arange(5)
    cov = EmpiricalCovariance()
    with warnings.catch_warnings(record=True):
        cov.fit(X_1sample)

    # test integer type
    X_integer = np.asarray([[0, 1], [1, 0]])
    result = np.asarray([[0.25, -0.25], [-0.25, 0.25]])
    assert_array_almost_equal(empirical_covariance(X_integer), result)

    # test centered case
    cov = EmpiricalCovariance(assume_centered=True)
    cov.fit(X)
    assert_array_equal(cov.location_, np.zeros(X.shape[1]))
Esempio n. 41
0
 def objective_function(self, data, location, covariance):
     """Objective function minimized at each step of the MCD algorithm.
     """
     precision = pinvh(covariance)
     det = fast_logdet(precision)
     trace = np.trace(
         np.dot(empirical_covariance(data - location, assume_centered=True),
                precision))
     pen = self.shrinkage * np.trace(precision)
     return -det + trace + pen
Esempio n. 42
0
    def test_empirical_covariance(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)

        result = df.covariance.empirical_covariance()
        expected = covariance.empirical_covariance(iris.data)
        self.assertIsInstance(result, pdml.ModelFrame)
        tm.assert_index_equal(result.index, df.data.columns)
        tm.assert_index_equal(result.columns, df.data.columns)
        self.assert_numpy_array_almost_equal(result.values, expected)
Esempio n. 43
0
    def test_empirical_covariance(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)

        result = df.covariance.empirical_covariance()
        expected = covariance.empirical_covariance(iris.data)
        self.assertTrue(isinstance(result, pdml.ModelFrame))
        self.assert_index_equal(result.index, df.data.columns)
        self.assert_index_equal(result.columns, df.data.columns)
        self.assert_numpy_array_almost_equal(result.values, expected)
Esempio n. 44
0
def empirical_covariances(subjects, assume_centered=False, standardize=False):
    """Compute empirical covariances for several signals.

    Parameters
    ----------
    subjects : list of numpy.ndarray, shape for each (n_samples, n_features)
        input subjects. Each subject is a 2D array, whose columns contain
        signals. Sample number can vary from subject to subject, but all
        subjects must have the same number of features (i.e. of columns).

    assume_centered : bool, optional
        if True, assume that all input signals are centered. This slightly
        decreases computation time by avoiding useless computation.

    standardize : bool, optional
        if True, set every signal variance to one before computing their
        covariance matrix (i.e. compute a correlation matrix).

    Returns
    -------
    emp_covs : numpy.ndarray, shape : (feature number, feature number, subject number)
        empirical covariances.

    n_samples : numpy.ndarray, shape: (subject number,)
        number of samples for each subject. dtype is np.float.
    """
    if not hasattr(subjects, "__iter__"):
        raise ValueError("'subjects' input argument must be an iterable. "
                         "You provided {0}".format(subjects.__class__))

    n_subjects = [s.shape[1] for s in subjects]
    if len(set(n_subjects)) > 1:
        raise ValueError("All subjects must have the same number of "
                         "features.\nYou provided: {0}".format(str(n_subjects))
                         )
    n_subjects = len(subjects)
    n_features = subjects[0].shape[1]

    # Enable to change dtype here because depending on user, conversion from
    # single precision to double will be required or not.
    emp_covs = np.empty((n_features, n_features, n_subjects), order="F")
    for k, s in enumerate(subjects):
        if standardize:
            s = s / s.std(axis=0)  # copy on purpose
        M = empirical_covariance(s, assume_centered=assume_centered)

        # Force matrix symmetry, for numerical stability
        # of _group_sparse_covariance
        emp_covs[..., k] = M + M.T
    emp_covs /= 2

    n_samples = np.asarray([s.shape[0] for s in subjects], dtype=np.float)

    return emp_covs, n_samples
Esempio n. 45
0
def feat_select(f):

    cp = load(read)
    (X, y, t) = cp.export_data(f)

    data = numpy.c_[X, y]
    cov = empirical_covariance(data, False)

    print cov

    for i in range(cov.shape[0] - 1):
        print cov[i, -1]
def test_graph_lasso_2D():
    # Hard-coded solution from Python skggm package
    # obtained by calling `quic(emp_cov, lam=.1, tol=1e-8)`
    cov_skggm = np.array([[3.09550269, 1.186972],
                         [1.186972, 0.57713289]])

    icov_skggm = np.array([[1.52836773, -3.14334831],
                          [-3.14334831,  8.19753385]])
    X = datasets.load_iris().data[:, 2:]
    emp_cov = empirical_covariance(X)
    for method in ('cd', 'lars'):
        cov, icov = graphical_lasso(emp_cov, alpha=.1, return_costs=False,
                                    mode=method)
        assert_array_almost_equal(cov, cov_skggm)
        assert_array_almost_equal(icov, icov_skggm)
Esempio n. 47
0
 def train(self, use_entropy=False):
     """ Train the classifier for all the models that it knows. """
     if len(self.dict_categories) < 2:
         print "At least two categories are needed for training..."
         print "Training is skipped."
         return
     (X, Y, W) = self._get_example_matrix(use_entropy)
     if (hasattr(self.classifier, 'metric') and
        self.classifier.metric == 'mahalanobis'):
         # The mahalanobis distance needs the covariance of the data
         cov = covariance.empirical_covariance(X)
         self.classifier.metric_kwds['V'] = cov
     print "Training with {} categories and {} views.".format(
         len(self.dict_categories), len(Y))
     print self.classifier.fit(X, Y)
Esempio n. 48
0
def plot_all(X):
	tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
	#----------------------------------------------------------------------
	# Pre-processing
	print "t-SNE Scaling"
	X_scaled = preprocessing.scale(X) #zero mean, unit variance
	X_tsne_scaled = tsne.fit_transform(X_scaled)

	#normalize the data (scaling individual samples to have unit norm)
	print "t-SNE L2 Norm"
	X_normalized = preprocessing.normalize(X, norm='l2')
	X_tsne_norm = tsne.fit_transform(X_normalized)


	#whiten the data 
	print "t-SNE Whitening"
	# the mean computed by the scaler is for the feature dimension. 
	# We want the normalization to be in feature dimention. 
	# Zero mean for each sample assumes stationarity which is not necessarily true for CNN features.
	# X: NxD where N is number of examples and D is number of features. 

	# scaler = preprocessing.StandardScaler(with_std=False).fit(X)
	scaler = preprocessing.StandardScaler().fit(X) #this scales each feature to have std-dev 1
	X_centered = scaler.transform(X)

	# U, s, Vh = linalg.svd(X_centered)
	shapeX = X_centered.shape
	IPython.embed()
	# this is DxD matrix where D is the feature dimension
	# still to figure out: It seems computation is not a problem but carrying around a 50kx50k matrix is memory killer!
	sig = (1/shapeX[0]) * np.dot(X_centered.T, X_centered)
	sig2= covariance.empirical_covariance(X_centered, assume_centered=True) #estimated -- this is better.
	sig3, shrinkage= covariance.oas(X_centered, assume_centered=True) #estimated 

	U, s, Vh = linalg.svd(sig, full_matrices=False)
	eps = 1e-2 # this affects how many low- freq eigevalues are eliminated
	invS = np.diag (np.reciprocal(np.sqrt(s+eps)))

	#PCA_whiten
	X_pca = np.dot(invS, np.dot(U.T, X_centered))
	X_tsne_pca = tsne.fit_transform(X_pca)

	#whiten the data (ZCA)
	X_zca = np.dot(U, X_pca)
	X_tsne_zca = tsne.fit_transform(X_zca)

	return X_tsne_scaled, X_tsne_norm, X_tsne_pca, X_tsne_zca
def test_graphical_lasso(random_state=0):
    # Sample data from a sparse multivariate normal
    dim = 20
    n_samples = 100
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.95,
                                  random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    emp_cov = empirical_covariance(X)

    for alpha in (0., .1, .25):
        covs = dict()
        icovs = dict()
        for method in ('cd', 'lars'):
            cov_, icov_, costs = graphical_lasso(emp_cov, return_costs=True,
                                                 alpha=alpha, mode=method)
            covs[method] = cov_
            icovs[method] = icov_
            costs, dual_gap = np.array(costs).T
            # Check that the costs always decrease (doesn't hold if alpha == 0)
            if not alpha == 0:
                assert_array_less(np.diff(costs), 0)
        # Check that the 2 approaches give similar results
        assert_array_almost_equal(covs['cd'], covs['lars'], decimal=4)
        assert_array_almost_equal(icovs['cd'], icovs['lars'], decimal=4)

    # Smoke test the estimator
    model = GraphicalLasso(alpha=.25).fit(X)
    model.score(X)
    assert_array_almost_equal(model.covariance_, covs['cd'], decimal=4)
    assert_array_almost_equal(model.covariance_, covs['lars'], decimal=4)

    # For a centered matrix, assume_centered could be chosen True or False
    # Check that this returns indeed the same result for centered data
    Z = X - X.mean(0)
    precs = list()
    for assume_centered in (False, True):
        prec_ = GraphicalLasso(
            assume_centered=assume_centered).fit(Z).precision_
        precs.append(prec_)
    assert_array_almost_equal(precs[0], precs[1])
Esempio n. 50
0
def _naive_ledoit_wolf_shrinkage(X):
    # A simple implementation of the formulas from Ledoit & Wolf

    # The computation below achieves the following computations of the
    # "O. Ledoit and M. Wolf, A Well-Conditioned Estimator for
    # Large-Dimensional Covariance Matrices"
    # beta and delta are given in the beginning of section 3.2
    n_samples, n_features = X.shape
    emp_cov = empirical_covariance(X, assume_centered=False)
    mu = np.trace(emp_cov) / n_features
    delta_ = emp_cov.copy()
    delta_.flat[::n_features + 1] -= mu
    delta = (delta_ ** 2).sum() / n_features
    X2 = X ** 2
    beta_ = 1. / (n_features * n_samples) \
        * np.sum(np.dot(X2.T, X2) / n_samples - emp_cov ** 2)

    beta = min(beta_, delta)
    shrinkage = beta / delta
    return shrinkage
def lasso_gsc_comparison():
    """Check that graph lasso and group-sparse covariance give the same
    output for a single task."""
    from sklearn.covariance import graph_lasso, empirical_covariance

    parameters = {'n_tasks': 1, 'n_var': 20, 'density': 0.15,
                  'rho': .2, 'tol': 1e-4, 'max_iter': 50}

    _, _, gt = create_signals(parameters, output_dir=output_dir)
    signals = gt["signals"]

    _, gsc_precision = utils.timeit(group_sparse_covariance)(
        signals, parameters['rho'], max_iter=parameters['max_iter'],
        tol=parameters['tol'], verbose=1, debug=False)

    emp_cov = empirical_covariance(signals[0])
    _, gl_precision = utils.timeit(graph_lasso)(
        emp_cov, parameters['rho'], tol=parameters['tol'],
        max_iter=parameters['max_iter'])

    np.testing.assert_almost_equal(gl_precision, gsc_precision[..., 0],
                                   decimal=4)
def prepareProblem(filePath, shrinkage=False, subset=False, subsetSize=0):
    # Import data from .csv
    df = pd.read_csv(filePath, sep=';')
    df.index = df.date
    df = df.drop('date', axis=1)

    # Subset, if called via subset == True
    if subset == True:
        df = df.tail(subsetSize)

    # Estimate covariance using Empirical/MLE
    # Expected input is returns, hence set: assume_centered = True
    mleFitted = empirical_covariance(X=df, assume_centered=True)
    sigma = mleFitted

    if shrinkage == True:
        # Estimate covariance using LedoitWolf, first create instance of object
        lw = LedoitWolf(assume_centered=True)
        lwFitted = lw.fit(X=df).covariance_
        sigma = lwFitted

    return sigma
def test_graphical_lasso_iris():
    # Hard-coded solution from R glasso package for alpha=1.0
    # (need to set penalize.diagonal to FALSE)
    cov_R = np.array([
        [0.68112222, 0.0000000, 0.265820, 0.02464314],
        [0.00000000, 0.1887129, 0.000000, 0.00000000],
        [0.26582000, 0.0000000, 3.095503, 0.28697200],
        [0.02464314, 0.0000000, 0.286972, 0.57713289]
        ])
    icov_R = np.array([
        [1.5190747, 0.000000, -0.1304475, 0.0000000],
        [0.0000000, 5.299055, 0.0000000, 0.0000000],
        [-0.1304475, 0.000000, 0.3498624, -0.1683946],
        [0.0000000, 0.000000, -0.1683946, 1.8164353]
        ])
    X = datasets.load_iris().data
    emp_cov = empirical_covariance(X)
    for method in ('cd', 'lars'):
        cov, icov = graphical_lasso(emp_cov, alpha=1.0, return_costs=False,
                                    mode=method)
        assert_array_almost_equal(cov, cov_R)
        assert_array_almost_equal(icov, icov_R)
Esempio n. 54
0
def test_graph_lasso_iris():
    # Hard-coded solution from R glasso package for alpha=1.0
    # The iris datasets in R and scikit-learn do not match in a few places,
    # these values are for the scikit-learn version.
    cov_R = np.array([
        [0.68112222, 0.0, 0.2651911, 0.02467558],
        [0.00, 0.1867507, 0.0, 0.00],
        [0.26519111, 0.0, 3.0924249, 0.28774489],
        [0.02467558, 0.0, 0.2877449, 0.57853156]
        ])
    icov_R = np.array([
        [1.5188780, 0.0, -0.1302515, 0.0],
        [0.0, 5.354733, 0.0, 0.0],
        [-0.1302515, 0.0, 0.3502322, -0.1686399],
        [0.0, 0.0, -0.1686399, 1.8123908]
        ])
    X = datasets.load_iris().data
    emp_cov = empirical_covariance(X)
    for method in ('cd', 'lars'):
        cov, icov = graph_lasso(emp_cov, alpha=1.0, return_costs=False,
                                mode=method)
        assert_array_almost_equal(cov, cov_R)
        assert_array_almost_equal(icov, icov_R)
Esempio n. 55
0
    def _nonrobust_covariance(self, data, assume_centered=False):
        """Non-robust estimation of the covariance to be used within MCD.

        Parameters
        ----------
        data: array_like, shape (n_samples, n_features)
          Data for which to compute the non-robust covariance matrix.
        assume_centered: Boolean
          Whether or not the observations should be considered as centered.

        Returns
        -------
        nonrobust_covariance: array_like, shape (n_features, n_features)
          The non-robust covariance of the data.

        """
        if self.cov_computation_method is None:
            cov = empirical_covariance(data, assume_centered=assume_centered)
            cov.flat[::data.shape[1] + 1] += self.shrinkage
        elif self.cov_computation_method == "diag":
            cov = np.diag(np.var(data, 0)) / self.shrinkage
        else:
            raise NotImplemented
        return cov
Esempio n. 56
0
    def set_optimal_shrinkage_amount(self, X, verbose=False):
        """

        Parameters
        ----------
        X: array-like, shape = [n_samples, n_features]
          Training data, where n_samples is the number of samples
          and n_features is the number of features.

        Returns
        -------
        optimal_shrinkage: The optimal amount of shrinkage, chosen with a
        10-fold cross-validation. (or a Leave-One Out cross-validation
        if n_samples < 10).

        """
        n_samples, n_features = X.shape
        std_shrinkage = np.trace(empirical_covariance(X)) / \
            float(n_samples * n_features)
        # use L2 here? (was done during research work, changed for consistency)
        rmcd = RMCDl1(shrinkage=std_shrinkage).fit(X)
        cov = GraphLassoCV().fit(X[rmcd.raw_support_])
        self.shrinkage = cov.alpha_
        return cov.cv_alphas_, cov.cv_scores
    def fit(self,X,Y):
        self.predictions=[]
        n=len(Y)
        self.MSEs=[]
        self.weights=[]
        for reg in self.regList:
            self.predictions.append(cross_val_predict(reg,X,Y,cv=self.cv))
            MSE=sum([(p-a)**2. for (p,a) in zip(self.predictions[-1],Y)])/n
            self.MSEs.append(MSE)
            reg.fit(X,Y)

        if self.weighting=='uniform':
            self.weights=[1./len(self.regList)]*len(self.regList)
        elif self.weighting=='score':
            tot=sum([1./s for s in self.MSEs])
            self.weights=[1./(s*tot) for s in self.MSEs]
        elif self.weighting=='varMin':
            self.covariance=empirical_covariance(np.array(self.predictions).T)
            self.weights=smallestVarianceWeights(self.covariance,self.MSEs,self.biasWeighting)
        elif self.weighting=='linearReg':
            self.stacker.fit(np.array(self.predictions).T,Y)
            self.weights=self.stacker.coef_
        
        print self.weights
Esempio n. 58
0
def launch_mcd_on_dataset(n_samples, n_features, n_outliers):

    rand_gen = np.random.RandomState(0)
    data = rand_gen.randn(n_samples, n_features)
    # add some outliers
    outliers_index = rand_gen.permutation(n_samples)[:n_outliers]
    outliers_offset = 10. * \
        (rand_gen.randint(2, size=(n_outliers, n_features)) - 0.5)
    data[outliers_index] += outliers_offset
    inliers_mask = np.ones(n_samples).astype(bool)
    inliers_mask[outliers_index] = False

    pure_data = data[inliers_mask]
    # compute MCD by fitting an object
    mcd_fit = MCD().fit(data)
    T = mcd_fit.location_
    S = mcd_fit.covariance_
    # compare with the estimates learnt from the inliers
    error_location = np.mean((pure_data.mean(0) - T) ** 2)
    print error_location
    assert(error_location < 1.)
    error_cov = np.mean((empirical_covariance(pure_data) - S) ** 2)
    print error_cov
    assert(error_cov < 1.)
Esempio n. 59
0
def test_oas():
    """Tests OAS module on a simple dataset.

    """
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    oa = OAS(assume_centered=True)
    oa.fit(X_centered)
    shrinkage_ = oa.shrinkage_
    score_ = oa.score(X_centered)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_centered,
                                                 assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS(assume_centered=True)
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False, assume_centered=True)
    oa.fit(X_centered)
    assert_almost_equal(oa.score(X_centered), score_, 4)
    assert(oa.precision_ is None)

    ### Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X)
    assert_almost_equal(oa.shrinkage_, shrinkage_, 4)
    assert_almost_equal(oa.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

    # test with one sample
    X_1sample = np.arange(5)
    oa = OAS()
    with warnings.catch_warnings(record=True):
        oa.fit(X_1sample)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X)
    assert_almost_equal(oa.score(X), score_, 4)
    assert(oa.precision_ is None)
Esempio n. 60
0
def test_ledoit_wolf():
    """Tests LedoitWolf module on a simple dataset.

    """
    # test shrinkage coeff on a simple data set
    lw = LedoitWolf()
    lw.fit(X, assume_centered=True)
    assert_almost_equal(lw.shrinkage_, 0.00192, 4)
    assert_almost_equal(lw.score(X, assume_centered=True), -2.89795, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X,
                                                        assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
    scov.fit(X, assume_centered=True)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf()
    lw.fit(X_1d, assume_centered=True)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False)
    lw.fit(X, assume_centered=True)
    assert_almost_equal(lw.score(X, assume_centered=True), -2.89795, 4)
    assert(lw.precision_ is None)

    # Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    lw = LedoitWolf()
    lw.fit(X)
    assert_almost_equal(lw.shrinkage_, 0.007582, 4)
    assert_almost_equal(lw.score(X), 2.243483, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf()
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False)
    lw.fit(X)
    assert_almost_equal(lw.score(X), 2.2434839, 4)
    assert(lw.precision_ is None)