Python oasの例、sklearn.covariance.oas Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_covariance.py プロジェクト: bvtrach/scikit-learn

def test_oas():
    """Tests OAS module on a simple dataset.

    """
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X, assume_centered=True)
    assert_almost_equal(oa.shrinkage_, 0.018740, 4)
    assert_almost_equal(oa.score(X, assume_centered=True), -5.03605, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X, assume_centered=True)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    oa = OAS()
    oa.fit(X_1d, assume_centered=True)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X, assume_centered=True)
    assert_almost_equal(oa.score(X, assume_centered=True), -5.03605, 4)
    assert(oa.precision_ is None)

    ### Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X)
    assert_almost_equal(oa.shrinkage_, 0.020236, 4)
    assert_almost_equal(oa.score(X), 2.079025, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    oa = OAS()
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X)
    assert_almost_equal(oa.score(X), 2.079025, 4)
    assert(oa.precision_ is None)

コード例 #2

0

ファイルを表示

ファイル: portfolio.py プロジェクト: orangezeit/quant

def estimate(df,
             mean_est='equal_weights',
             cov_est='equal_weights',
             alpha=1e-10):
    """
        Estimate mean and covariance given historical data

        Parameters
        ----------
        df: pd.DataFrame (n.sample, n.feature)
            historical data

        mean_est: str
            method to estimate mean
            selected from {'equal_weights', 'exponential_weights', 'linear-weights'}

        cov_est: str
            method to estimate covariance
            selected from {'equal_weights', 'exponential_weights', 'ledoit_wolf', 'oas'}

        alpha: float, required if exponential_weights selected
            [0, 1], larger alpha means more weights on near
            exponential_weights -> equal_weights if alpha -> 0

        Return
        ------
        mean, cov: np.array
            estimated mean (n.feature) and covariance (n.feature * n.feature)
    """

    if not isinstance(df, pd.DataFrame):
        raise TypeError('Historical data must be data frame.')

    if not isinstance(alpha, float):
        raise TypeError('Parameter alpha must be float.')

    if mean_est == 'equal_weights':
        mean = df.mean().values
    elif mean_est == 'exponential_weights':
        mean = df.ewm(alpha=alpha).mean().iloc[-1].values
    elif mean_est == 'linear-weights':
        weights = np.array(range(1, df.shape[0] + 1))
        mean = df.values.T @ weights / sum(weights)
    else:
        raise ValueError('Method does not exist.')

    if cov_est == 'equal_weights':
        cov = df.cov().values
    elif cov_est == 'exponential_weights':
        cov = df.ewm(alpha=alpha).cov().iloc[-df.shape[1]:].values
    elif cov_est == 'ledoit_wolf':
        cov, _ = ledoit_wolf(df)
    elif cov_est == 'oas':
        cov, _ = oas(df)
    else:
        raise ValueError('Method does not exist.')

    return mean, cov

コード例 #3

0

ファイルを表示

ファイル: mcca.py プロジェクト: DawnSmithaa/mvlearn

def _construct_mcca_gevp(Xs, regs=None, as_lists=False):
    r"""
    Constructs the matrices for the MCCA generalized eigenvector problem
    :math:`LHS v = \lambda RHS v`.

    Parameters
    ----------
    Xs : list of array-likes or numpy.ndarray
        The list of data matrices

    regs : None | float | 'lw' | 'oas' or list of them, shape (n_views)
        As described in ``mvlearn.mcca.mcca.MCCA``

    as_lists : bool
        If True, returns LHS and RHS as lists of composing blocks instead
        of their composition into full matrices.

    Returns
    -------
    LHS, RHS : numpy.ndarray, (sum_b n_features_b, sum_b n_features_b)
        Left and right hand side matrices for the GEVP
    """
    Xs, n_views, n_samples, n_features = check_Xs(
        Xs, multiview=True, return_dimensions=True
    )
    regs = _check_regs(regs, n_views)

    LHS = [[None for b in range(n_views)] for b in range(n_views)]
    RHS = [None for b in range(n_views)]

    # cross covariance matrices
    for (a, b) in combinations(range(n_views), 2):
        LHS[a][b] = Xs[a].T @ Xs[b]
        LHS[b][a] = LHS[a][b].T

    # view covariance matrices, possibly regularized
    for b in range(n_views):
        if regs[b] is None:
            RHS[b] = Xs[b].T @ Xs[b]
        elif isinstance(regs[b], Number):
            RHS[b] = (1 - regs[b]) * Xs[b].T @ Xs[b] + \
                regs[b] * np.eye(n_features[b])
        elif isinstance(regs[b], str):
            if regs[b] == "lw":
                RHS[b] = ledoit_wolf(Xs[b])[0]
            elif regs[b] == "oas":
                RHS[b] = oas(Xs[b])[0]
            # put back on scale of X^TX as oppose to
            # proper cov est returned by these functions
            RHS[b] *= n_samples

        LHS[b][b] = RHS[b]

    if not as_lists:
        LHS = np.block(LHS)
        RHS = block_diag(*RHS)

    return LHS, RHS

コード例 #4

0

ファイルを表示

ファイル: risk_models.py プロジェクト: EgorWK4/PyPortfolioOpt

    def oracle_approximating(self):
        """
        Calculate the Oracle Approximating Shrinkage estimate

        :return: shrunk sample covariance matrix
        :rtype: np.ndarray
        """
        X = np.nan_to_num(self.X.values)
        shrunk_cov, self.delta = covariance.oas(X)
        return self.format_and_annualise(shrunk_cov)

コード例 #5

0

ファイルを表示

ファイル: risk_models.py プロジェクト: robertmartin8/PyPortfolioOpt

    def oracle_approximating(self):
        """
        Calculate the Oracle Approximating Shrinkage estimate

        :return: shrunk sample covariance matrix
        :rtype: np.ndarray
        """
        X = np.nan_to_num(self.X.values)
        shrunk_cov, self.delta = covariance.oas(X)
        return self.format_and_annualise(shrunk_cov)

コード例 #6

0

ファイルを表示

ファイル: tsne.py プロジェクト: adithyamurali/C3D

def plot_all(X):
    tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
    #----------------------------------------------------------------------
    # Pre-processing
    print "t-SNE Scaling"
    X_scaled = preprocessing.scale(X)  #zero mean, unit variance
    X_tsne_scaled = tsne.fit_transform(X_scaled)

    #normalize the data (scaling individual samples to have unit norm)
    print "t-SNE L2 Norm"
    X_normalized = preprocessing.normalize(X, norm='l2')
    X_tsne_norm = tsne.fit_transform(X_normalized)

    #whiten the data
    print "t-SNE Whitening"
    # the mean computed by the scaler is for the feature dimension.
    # We want the normalization to be in feature dimention.
    # Zero mean for each sample assumes stationarity which is not necessarily true for CNN features.
    # X: NxD where N is number of examples and D is number of features.

    # scaler = preprocessing.StandardScaler(with_std=False).fit(X)
    scaler = preprocessing.StandardScaler().fit(
        X)  #this scales each feature to have std-dev 1
    X_centered = scaler.transform(X)

    # U, s, Vh = linalg.svd(X_centered)
    shapeX = X_centered.shape
    IPython.embed()
    # this is DxD matrix where D is the feature dimension
    # still to figure out: It seems computation is not a problem but carrying around a 50kx50k matrix is memory killer!
    sig = (1 / shapeX[0]) * np.dot(X_centered.T, X_centered)
    sig2 = covariance.empirical_covariance(
        X_centered, assume_centered=True)  #estimated -- this is better.
    sig3, shrinkage = covariance.oas(X_centered,
                                     assume_centered=True)  #estimated

    U, s, Vh = linalg.svd(sig, full_matrices=False)
    eps = 1e-2  # this affects how many low- freq eigevalues are eliminated
    invS = np.diag(np.reciprocal(np.sqrt(s + eps)))

    #PCA_whiten
    X_pca = np.dot(invS, np.dot(U.T, X_centered))
    X_tsne_pca = tsne.fit_transform(X_pca)

    #whiten the data (ZCA)
    X_zca = np.dot(U, X_pca)
    X_tsne_zca = tsne.fit_transform(X_zca)

    return X_tsne_scaled, X_tsne_norm, X_tsne_pca, X_tsne_zca

コード例 #7

0

ファイルを表示

ファイル: test_covariance.py プロジェクト: Sandy4321/pandas-ml

    def test_oas(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)

        result = df.covariance.oas()
        expected = covariance.oas(iris.data)

        self.assertEqual(len(result), 2)

        self.assertTrue(isinstance(result[0], pdml.ModelFrame))
        self.assert_index_equal(result[0].index, df.data.columns)
        self.assert_index_equal(result[0].columns, df.data.columns)
        self.assert_numpy_array_almost_equal(result[0].values, expected[0])

        self.assert_numpy_array_almost_equal(result[1], expected[1])

コード例 #8

0

ファイルを表示

ファイル: helper.py プロジェクト: sohel10/hsic-lasso-psi

def covariance(H_estimates, m, cov_mode):
    """Covariance estimation for H-vector with different methods"""
    if cov_mode == 'ledoit_wolf':
        cov, _ = ledoit_wolf(H_estimates.T)
    elif cov_mode == 'empirical':
        cov = np.cov(H_estimates)
    elif cov_mode == 'shrink_ss':
        cov, _ = covar.cov_shrink_ss(H_estimates.T)
    elif cov_mode == "shrink_rblw":
        S = np.cov(H_estimates)
        cov, _ = covar.cov_shrink_rblw(S, H_estimates.shape[1])
    else: # default: 'oas'
        cov, _ = oas(H_estimates.T)
    cov = cov / m
    return cov

コード例 #9

0

ファイルを表示

ファイル: estimators.py プロジェクト: mckib2/toeplitz-cov-estimator

def shrinkage(xs):
    '''Estimate covariance using Oracle Approximating shrinkage.

    Parameters
    ----------
    xs : array_like
        N samples of X.

    Returns
    -------
    C : array_like
        Covariance matrix estimation.
    '''
    C, _alpha = oas(xs, assume_centered=True)
    return C

コード例 #10

0

ファイルを表示

ファイル: test_covariance.py プロジェクト: sinhrks/pandas-ml

    def test_oas(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)

        result = df.covariance.oas()
        expected = covariance.oas(iris.data)

        self.assertEqual(len(result), 2)

        self.assertIsInstance(result[0], pdml.ModelFrame)
        tm.assert_index_equal(result[0].index, df.data.columns)
        tm.assert_index_equal(result[0].columns, df.data.columns)
        self.assert_numpy_array_almost_equal(result[0].values, expected[0])

        self.assert_numpy_array_almost_equal(result[1], expected[1])

コード例 #11

0

ファイルを表示

ファイル: covariance.py プロジェクト: wozu-dichter/MI_code

def _oas(X):
    """Wrapper for sklearn oas covariance estimator.
    
    Parameters
    ----------
    X : ndarray
        EEG signal, shape (n_channels, n_samples).
    
    Returns
    -------
    C : ndarray
        Estimated covariance, shape (n_channels, n_channels).
    """
    C, _ = oas(X.T)
    return C

コード例 #12

0

ファイルを表示

ファイル: robust_statistics.py プロジェクト: zlatiadam/PyPortfolio

def get_covariance_estimator(estimator):
    if hasattr(estimator, "__call__"):
        f = estimator
    elif type(estimator) == str:
        if estimator == "MCD" or estimator == "mcd" or estimator == "MinCovDet" or estimator == "fast_mcd":
            f = fast_mcd
        elif estimator == "Ledoit-Wolf" or estimator == "LW" or estimator == "lw":
            f = lambda x: ledoit_wolf(x)[0]
        elif estimator == "OAS" or estimator == "oas":
            f = lambda x: oas(x)[0]
        else:
            f = empirical_covariance
    else:
        f = empirical_covariance

    return f

コード例 #13

0

ファイルを表示

ファイル: tsne.py プロジェクト: adithyamurali/C3D

def plot_all(X):
	tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
	#----------------------------------------------------------------------
	# Pre-processing
	print "t-SNE Scaling"
	X_scaled = preprocessing.scale(X) #zero mean, unit variance
	X_tsne_scaled = tsne.fit_transform(X_scaled)

	#normalize the data (scaling individual samples to have unit norm)
	print "t-SNE L2 Norm"
	X_normalized = preprocessing.normalize(X, norm='l2')
	X_tsne_norm = tsne.fit_transform(X_normalized)


	#whiten the data 
	print "t-SNE Whitening"
	# the mean computed by the scaler is for the feature dimension. 
	# We want the normalization to be in feature dimention. 
	# Zero mean for each sample assumes stationarity which is not necessarily true for CNN features.
	# X: NxD where N is number of examples and D is number of features. 

	# scaler = preprocessing.StandardScaler(with_std=False).fit(X)
	scaler = preprocessing.StandardScaler().fit(X) #this scales each feature to have std-dev 1
	X_centered = scaler.transform(X)

	# U, s, Vh = linalg.svd(X_centered)
	shapeX = X_centered.shape
	IPython.embed()
	# this is DxD matrix where D is the feature dimension
	# still to figure out: It seems computation is not a problem but carrying around a 50kx50k matrix is memory killer!
	sig = (1/shapeX[0]) * np.dot(X_centered.T, X_centered)
	sig2= covariance.empirical_covariance(X_centered, assume_centered=True) #estimated -- this is better.
	sig3, shrinkage= covariance.oas(X_centered, assume_centered=True) #estimated 

	U, s, Vh = linalg.svd(sig, full_matrices=False)
	eps = 1e-2 # this affects how many low- freq eigevalues are eliminated
	invS = np.diag (np.reciprocal(np.sqrt(s+eps)))

	#PCA_whiten
	X_pca = np.dot(invS, np.dot(U.T, X_centered))
	X_tsne_pca = tsne.fit_transform(X_pca)

	#whiten the data (ZCA)
	X_zca = np.dot(U, X_pca)
	X_tsne_zca = tsne.fit_transform(X_zca)

	return X_tsne_scaled, X_tsne_norm, X_tsne_pca, X_tsne_zca

コード例 #14

0

ファイルを表示

ファイル: optimization.py プロジェクト: patricklucescu/Asset_Managment

def optimize(returns, risk_aversion, parameters):
    K, p, iterations = parameters[0], parameters[1], parameters[2]

    # Predict the returns
    posteriori_prob, mu_s, cov_s, predicted_return = expectation_maximization(
        returns, K, iterations, p)

    # UNCOMMENT THIS IF YOU WANT TO INVEST IN TOP nLongs ASSETS WITH HIGHEST PREDICTED RETURNS
    # nLongs = 3
    # idx = (-predicted_return).argsort()[:nLongs]
    # weights = [0] * predicted_return
    # weights[idx] = 1 / nLongs
    # return weights

    cov = risk_aversion * pd.DataFrame(data=cv.oas(returns)[0],
                                       index=returns.columns,
                                       columns=returns.columns).fillna(0)
    problem = osqp.OSQP()
    k = len(predicted_return)
    """
        setup(self, P=None, q=None, A=None, l=None, u=None, **settings):
                Setup OSQP solver problem of the form
                minimize     1/2 x' * P * x + q' * x
                subject to   l <= A * x <= u
        """
    A = np.concatenate((pd.np.ones((1, k)), np.eye(k)), axis=0)
    sA = sparse.csr_matrix(A)
    l = np.hstack([1, np.zeros(k)])
    u = np.ones(k + 1)
    sCov = sparse.csr_matrix(cov)

    problem.setup(sCov, -predicted_return, sA, l, u)

    # Solve problem
    res = problem.solve()
    pr = pd.Series(data=res.x, index=returns.columns)
    return pr

コード例 #15

0

ファイルを表示

ファイル: app.py プロジェクト: danielrmt/dash-portfolio

def update_covmatrix(logreturns, assets_table, selected_rows, method):
    logreturns = pd.DataFrame(logreturns).set_index('Date')
    df = pd.DataFrame(assets_table)
    assets = df[df.index.isin(selected_rows)][['ticker', 'part', 'mktcap']]
    assets['part'] = assets['part'] / assets['part'].sum()
    assets['wmktcap'] = assets['mktcap'] / assets['mktcap'].sum()

    tickers = assets['ticker'].values
    if method == 'ledoit-wolf':
        covmatrix = LedoitWolf().fit(logreturns.dropna()).covariance_
        covmatrix = pd.DataFrame(covmatrix, index=tickers, columns=tickers)
    elif method == 'oas':
        covmatrix, x = oas(logreturns.dropna())
        covmatrix = pd.DataFrame(covmatrix, index=tickers, columns=tickers)
    else:
        covmatrix = logreturns.cov()

    m_ibov = r_ibov.resample('MS').sum()
    L = (.06 / 12) / (2. * m_ibov.std()[0]**2)
    assets['rindex'] = 2 * L * covmatrix.values @ assets['part'].values
    assets['rmktcap'] = 2 * L * covmatrix.values @ assets['wmktcap'].values

    return covmatrix.reset_index().to_dict('records'), \
        assets.to_dict('records')

コード例 #16

0

ファイルを表示

def _oas(X):
    """Wrapper for sklearn oas covariance estimator"""
    C, _ = oas(X.T)
    return C

コード例 #17

0

ファイルを表示

def _compute_power_envelopes(subject, kind, freqs):

    ###########################################################################
    # Compute source space
    # -------------------
    src = mne.setup_source_space(subject,
                                 spacing='oct6',
                                 add_dist=False,
                                 subjects_dir=cfg.mne_camcan_freesurfer_path)
    trans = trans_map[subject]
    bem = cfg.mne_camcan_freesurfer_path + \
        "/%s/bem/%s-meg-bem.fif" % (subject, subject)

    ###########################################################################
    # Compute handle MEG data
    # -----------------------

    fname = op.join(cfg.camcan_meg_raw_path, subject, kind,
                    '%s_raw.fif' % kind)

    raw = mne.io.read_raw_fif(fname)
    mne.channels.fix_mag_coil_types(raw.info)
    if DEBUG:
        # raw.crop(0, 180)
        raw.crop(0, 120)
    else:
        raw.crop(0, 300)

    raw = _run_maxfilter(raw, subject, kind)
    _compute_add_ssp_exg(raw)

    # get empty room
    fname_er = op.join(cfg.camcan_meg_path, "emptyroom", subject,
                       "emptyroom_%s.fif" % subject)

    raw_er = mne.io.read_raw_fif(fname_er)
    mne.channels.fix_mag_coil_types(raw.info)

    raw_er = _run_maxfilter(raw_er, subject, kind, coord_frame="meg")
    raw_er.info["projs"] += raw.info["projs"]

    cov = mne.compute_raw_covariance(raw_er, method='oas')
    # compute before band-pass of interest

    event_length = 5.
    event_overlap = 0.
    raw_length = raw.times[-1]
    events = mne.make_fixed_length_events(raw,
                                          duration=event_length,
                                          start=0,
                                          stop=raw_length - event_length)

    #######################################################################
    # Compute the forward and inverse
    # -------------------------------

    info = mne.Epochs(raw,
                      events=events,
                      tmin=0,
                      tmax=event_length,
                      baseline=None,
                      reject=None,
                      preload=False,
                      decim=10).info
    fwd = mne.make_forward_solution(info, trans, src, bem)
    inv = make_inverse_operator(info, fwd, cov)
    del fwd

    #######################################################################
    # Compute label time series and do envelope correlation
    # -----------------------------------------------------
    mne_subjects_dir = "/storage/inria/agramfor/MNE-sample-data/subjects"
    labels = mne.read_labels_from_annot('fsaverage',
                                        'aparc_sub',
                                        subjects_dir=mne_subjects_dir)
    labels = mne.morph_labels(labels,
                              subject_from='fsaverage',
                              subject_to=subject,
                              subjects_dir=cfg.mne_camcan_freesurfer_path)
    labels = [ll for ll in labels if 'unknown' not in ll.name]

    results = dict()
    for fmin, fmax, band in freqs:
        print(f"computing {subject}: {fmin} - {fmax} Hz")
        this_raw = raw.copy()
        this_raw.filter(fmin, fmax, n_jobs=1)
        reject = _get_global_reject_epochs(this_raw, decim=5)

        this_raw.apply_hilbert(envelope=False)

        epochs = mne.Epochs(this_raw,
                            events=events,
                            tmin=0,
                            tmax=event_length,
                            baseline=None,
                            reject=reject,
                            preload=True,
                            decim=5)
        if DEBUG:
            epochs = epochs[:3]

        result = {
            'subject': subject,
            'fmin': fmin,
            'fmax': fmax,
            'band': band,
            'label_names': [ll.name for ll in labels]
        }

        stcs = apply_inverse_epochs(epochs,
                                    inv,
                                    lambda2=1. / 9.,
                                    pick_ori='normal',
                                    method='MNE',
                                    return_generator=True)

        label_ts = np.concatenate(mne.extract_label_time_course(
            stcs, labels, inv['src'], mode="pca_flip", return_generator=False),
                                  axis=-1)

        result['cov'], _ = oas(np.abs(label_ts).T, assume_centered=False)

        for orth in ("pairwise", False):
            corr = envelope_correlation(label_ts[np.newaxis],
                                        combine="mean",
                                        orthogonalize=orth)
            result[f"corr{'_orth' if orth else ''}"] = corr[np.triu_indices(
                len(corr))]

        results[band] = result

        if False:  # failsafe mode with intermediate steps written out
            out_fname = op.join(
                cfg.derivative_path,
                f'{subject + ("-debug" if DEBUG else "")}_'
                f'power_envelopes_{band}.h5')

            mne.externals.h5io.write_hdf5(out_fname, result, overwrite=True)
    return results

コード例 #18

0

ファイルを表示

def test_oas():
    # Tests OAS module on a simple dataset.
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    oa = OAS(assume_centered=True)
    oa.fit(X_centered)
    shrinkage_ = oa.shrinkage_
    score_ = oa.score(X_centered)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_centered,
                                                 assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS(assume_centered=True)
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal((X_1d**2).sum() / n_samples, oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False, assume_centered=True)
    oa.fit(X_centered)
    assert_almost_equal(oa.score(X_centered), score_, 4)
    assert (oa.precision_ is None)

    # Same tests without assuming centered data--------------------------------
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X)
    assert_almost_equal(oa.shrinkage_, shrinkage_, 4)
    assert_almost_equal(oa.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

    # test with one sample
    # FIXME I don't know what this test does
    X_1sample = np.arange(5)
    oa = OAS()
    assert_warns(UserWarning, oa.fit, X_1sample)
    assert_array_almost_equal(oa.covariance_,
                              np.zeros(shape=(5, 5), dtype=np.float64))

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X)
    assert_almost_equal(oa.score(X), score_, 4)
    assert (oa.precision_ is None)

コード例 #19

0

ファイルを表示

ファイル: ctm.py プロジェクト: budgefeeney/sidetopics

def train (dataset, modelState, queryState, trainPlan):
    '''
    Infers the topic distributions in general, and specifically for
    each individual datapoint.
    
    Params:
    data - the dataset of words, features and links of which only words are used in this model
    modelState - the actual CTM model
    queryState - the query results - essentially all the "local" variables
                 matched to the given observations
    trainPlan  - how to execute the training process (e.g. iterations,
                 log-interval etc.)
                 
    Return:
    A new model object with the updated model (note parameters are
    updated in place, so make a defensive copy if you want it)
    A new query object with the update query parameters
    '''
    W   = dataset.words
    D,_ = W.shape
    
    # Unpack the the structs, for ease of access and efficiency
    iterations, epsilon, logFrequency, diagonalPriorCov, debug = trainPlan.iterations, trainPlan.epsilon, trainPlan.logFrequency, trainPlan.fastButInaccurate, trainPlan.debug
    means, expMeans, varcs, lxi, s, n = queryState.means, queryState.expMeans, queryState.varcs, queryState.lxi, queryState.s, queryState.docLens
    K, topicMean, sigT, vocab, vocabPrior, dtype = modelState.K, modelState.topicMean, modelState.sigT, modelState.vocab, modelState.vocabPrior, modelState.dtype
    
    # Book-keeping for logs
    boundIters   = np.zeros(shape=(iterations // logFrequency,))
    boundValues  = np.zeros(shape=(iterations // logFrequency,))
    likelyValues = np.zeros(shape=(iterations // logFrequency,))
    bvIdx = 0
    
    debugFn = _debug_with_bound if debug else _debug_with_nothing
    
    # Initialize some working variables
    isigT = la.inv(sigT)
    R = W.copy()
    
    s.fill(0)
    priorSigt_diag = np.ndarray(shape=(K,), dtype=dtype)
    priorSigt_diag.fill (0.1)
    kappa = K + 2

    expMeans = means.copy()
    
    # Iterate over parameters
    for itr in range(iterations):
        
        # We start with the M-Step, so the parameters are consistent with our
        # initialisation of the RVs when we do the E-Step
        
        # Update the mean and covariance of the prior
#        topicMean = means.mean(axis = 0)
        topicMean = means.sum(axis=0) / (D + kappa) \
                    if USE_NIW_PRIOR \
                    else means.mean(axis=0)
        debugFn (itr, topicMean, "topicMean", W, K, topicMean, sigT, vocab, vocabPrior, dtype, means, varcs, lxi, s, n)

        # diff = means - topicMean
        # sigT = diff.T.dot(diff) / D

        sigT, _ = oas(means, assume_centered=False)
        if dtype is not np.float64:
            sigT = sigT.astype(dtype)

        sigT += np.diag(varcs.mean(axis=0))

        if USE_NIW_PRIOR:
            sigT.flat[::K+1] += priorSigt_diag
            sigT += (kappa * D)/(kappa + D) * np.outer(topicMean, topicMean)

        # Building blocks...
        # 1/4 Create the precision matrix from the covariance
        if True or diagonalPriorCov:
            diag = np.diag(sigT)
            sigT = np.diag(diag)
            isigT = np.diag(1. / diag)
        else:
            isigT = la.inv(sigT)
        
        debugFn (itr, sigT, "sigT", W, K, topicMean, sigT, vocab, vocabPrior, dtype, means, varcs, lxi, s, n)
#        print ("         Det sigT = " + str(la.det(sigT)))
        
        # 2/4 temporarily replace means with exp(means)
        expMeans = np.exp(means - means.max(axis=1)[:,np.newaxis], out=expMeans)
        R = sparseScalarQuotientOfDot(W, expMeans, vocab, out=R)
        # S = expMeans * R.dot(vocab.T)
        
        # 3/4 Update the vocabulary
        vocab *= (R.T.dot(expMeans)).T # Awkward order to maintain sparsity (R is sparse, expMeans is dense)
        vocab += vocabPrior
        vocab = normalizerows_ip(vocab)

        R = sparseScalarQuotientOfDot(W, expMeans, vocab, out=R)
        S = expMeans * R.dot(vocab.T)
        
        # 4/4 Reset the means to their original form, and log effect of vocab update
        #means = np.log(expMeans, out=expMeans)
        debugFn (itr, vocab, "vocab", W, K, topicMean, sigT, vocab, vocabPrior, dtype, means, varcs, lxi, s, n)
        
        # And now this is the E-Step, though it's followed by updates for the
        # parameters also that handle the log-sum-exp approximation.
        
        # Update the Variances
        varcs = np.reciprocal(n[:,np.newaxis] * lxi + isigT.flat[::K+1])
        debugFn (itr, varcs, "varcs", W, K, topicMean, sigT, vocab, vocabPrior, dtype, means, varcs, lxi, s, n)
        
        # Update the Means
        vMat   = (s[:,np.newaxis] * lxi - 0.5) * n[:,np.newaxis] + S
        rhsMat = vMat + isigT.dot(topicMean)
        # for d in range(D):
        #     means[d,:] = la.inv(isigT + ssp.diags(n[d] * lxi[d,:], 0)).dot(rhsMat[d,:])
        means = varcs * rhsMat

        means -= (means[:,0])[:,np.newaxis]
        debugFn (itr, means, "means", W, K, topicMean, sigT, vocab, vocabPrior, dtype, means, varcs, lxi, s, n)
        
        # Update the approximation parameters
        lxi = 2 * negJakkolaOfDerivedXi(means, varcs, s)
        debugFn (itr, lxi, "lxi", W, K, topicMean, sigT, vocab, vocabPrior, dtype, means, varcs, lxi, s, n)
        
        # s can sometimes grow unboundedly
        # If so Bouchard's suggested approach of fixing it at zero
        #
        #s = (np.sum(lxi * means, axis=1) + 0.25 * K - 0.5) / np.sum(lxi, axis=1)
        debugFn (itr, s, "s", W, K, topicMean, sigT, vocab, vocabPrior, dtype, means, varcs, lxi, s, n)
        
        if logFrequency > 0 and itr % logFrequency == 0:
            modelState = ModelState(K, topicMean, sigT, vocab, vocabPrior, dtype, MODEL_NAME)
            queryState = QueryState(means, expMeans, varcs, lxi, s, n)
            
            boundValues[bvIdx]  = var_bound(dataset, modelState, queryState)
            likelyValues[bvIdx] = log_likelihood(dataset, modelState, queryState)
            boundIters[bvIdx]   = itr
            perp = perplexity_from_like(likelyValues[bvIdx], n.sum())
            
            print (time.strftime('%X') + " : Iteration %5d: Perplexity %4.2f  Bound %10.2f " % (itr, perp, boundValues[bvIdx]))
            if bvIdx > 0 and  boundValues[bvIdx - 1] > boundValues[bvIdx]:
                printStderr ("ERROR: bound degradation: %f > %f" % (boundValues[bvIdx - 1], boundValues[bvIdx]))
#             print ("Means: min=%f, avg=%f, max=%f\n\n" % (means.min(), means.mean(), means.max()))

            # Check to see if the improvment in the likelihood has fallen below the threshold
            if bvIdx > 1 and boundIters[bvIdx] >= 30:
                lastPerp = perplexity_from_like(likelyValues[bvIdx - 1], n.sum())
                if lastPerp - perp < 1:
                    boundIters, boundValues, likelyValues = clamp (boundIters, boundValues, likelyValues, bvIdx)
                    return modelState, queryState, (boundIters, boundValues, likelyValues)
            bvIdx += 1
            
    
    return \
        ModelState(K, topicMean, sigT, vocab, vocabPrior, dtype, MODEL_NAME), \
        QueryState(means, expMeans, varcs, lxi, s, n), \
        (boundIters, boundValues, likelyValues)

コード例 #20

0

ファイルを表示

def test_oas():
    """Tests OAS module on a simple dataset.

    """
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X, assume_centered=True)
    assert_almost_equal(oa.shrinkage_, 0.018740, 4)
    assert_almost_equal(oa.score(X, assume_centered=True), -5.03605, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X, assume_centered=True)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d, assume_centered=True)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X, assume_centered=True)
    assert_almost_equal(oa.score(X, assume_centered=True), -5.03605, 4)
    assert(oa.precision_ is None)

    ### Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X)
    assert_almost_equal(oa.shrinkage_, 0.020236, 4)
    assert_almost_equal(oa.score(X), 2.079025, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X)
    assert_almost_equal(oa.score(X), 2.079025, 4)
    assert(oa.precision_ is None)

コード例 #21

0

ファイルを表示

ファイル: test_covariance.py プロジェクト: AlexisMignon/scikit-learn

def test_oas():
    # Tests OAS module on a simple dataset.
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    oa = OAS(assume_centered=True)
    oa.fit(X_centered)
    shrinkage_ = oa.shrinkage_
    score_ = oa.score(X_centered)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_centered,
                                                 assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0:1]
    oa = OAS(assume_centered=True)
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_1d, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False, assume_centered=True)
    oa.fit(X_centered)
    assert_almost_equal(oa.score(X_centered), score_, 4)
    assert(oa.precision_ is None)

    # Same tests without assuming centered data--------------------------------
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X)
    assert_almost_equal(oa.shrinkage_, shrinkage_, 4)
    assert_almost_equal(oa.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shrinkage_from_mle = oas(X)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_1d)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

    # test with one sample
    # warning should be raised when using only 1 sample
    X_1sample = np.arange(5).reshape(1, 5)
    oa = OAS()
    assert_warns(UserWarning, oa.fit, X_1sample)
    assert_array_almost_equal(oa.covariance_,
                              np.zeros(shape=(5, 5), dtype=np.float64))

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X)
    assert_almost_equal(oa.score(X), score_, 4)
    assert(oa.precision_ is None)

コード例 #22

0

ファイルを表示

ファイル: test_covariance.py プロジェクト: osdf/scikit-learn

def test_oas():
    """Tests OAS module on a simple dataset.

    """
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    oa = OAS(assume_centered=True)
    oa.fit(X_centered)
    shrinkage_ = oa.shrinkage_
    score_ = oa.score(X_centered)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_centered,
                                                 assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS(assume_centered=True)
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal((X_1d**2).sum() / n_samples, oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False, assume_centered=True)
    oa.fit(X_centered)
    assert_almost_equal(oa.score(X_centered), score_, 4)
    assert (oa.precision_ is None)

    ### Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X)
    assert_almost_equal(oa.shrinkage_, shrinkage_, 4)
    assert_almost_equal(oa.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

    # test with one sample
    X_1sample = np.arange(5)
    oa = OAS()
    with warnings.catch_warnings(record=True):
        oa.fit(X_1sample)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X)
    assert_almost_equal(oa.score(X), score_, 4)
    assert (oa.precision_ is None)

コード例 #23

0

ファイルを表示

ファイル: test_covariance.py プロジェクト: GbalsaC/bitnamiP

def test_oas():
    """Tests OAS module on a simple dataset.

    """
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    oa = OAS(assume_centered=True)
    oa.fit(X_centered)
    shrinkage_ = oa.shrinkage_
    score_ = oa.score(X_centered)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_centered,
                                                 assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS(assume_centered=True)
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False, assume_centered=True)
    oa.fit(X_centered)
    assert_almost_equal(oa.score(X_centered), score_, 4)
    assert(oa.precision_ is None)

    ### Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X)
    assert_almost_equal(oa.shrinkage_, shrinkage_, 4)
    assert_almost_equal(oa.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

    # test with one sample
    X_1sample = np.arange(5)
    oa = OAS()
    with warnings.catch_warnings(record=True):
        oa.fit(X_1sample)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X)
    assert_almost_equal(oa.score(X), score_, 4)
    assert(oa.precision_ is None)

コード例 #24

0

ファイルを表示

ファイル: covariance.py プロジェクト: kingjr/pyRiemann

def _oas(X):
    """Wrapper for sklearn oas covariance estimator"""
    C, _ = oas(X.T)
    return C

コード例 #25

0

ファイルを表示

C = [C_20, C_40, C_200, C_400]

X_40x20 = np.genfromtxt('tmpmat/X_40x20.csv')
X_20x40 = np.genfromtxt('tmpmat/X_20x40.csv')
X_400x200 = np.genfromtxt('tmpmat/X_400x200.csv')
X_200x400 = np.genfromtxt('tmpmat/X_200x400.csv')

X = [X_40x20, X_20x40, X_400x200, X_200x400]

times = np.zeros(len(p))
res = np.zeros(len(p))
for i in range(len(p)):
    Xi = X[i]
    Ci = C[i]
    start = time()
    C_oas, _ = oas(Xi)
    times[i] = time() - start
    res[i] = np.linalg.norm(C_oas - Ci)

print("OAS results")
print(res)
print("")
print(times)

times = np.zeros(len(p))
res = np.zeros(len(p))
for i in range(len(p)):
    Xi = X[i]
    Ci = C[i]
    start = time()
    C_lw, _ = ledoit_wolf(Xi)

コード例 #26

0

ファイルを表示

def test_oas():
    # Tests OAS module on a simple dataset.
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    oa = OAS(assume_centered=True)
    oa.fit(X_centered)
    shrinkage_ = oa.shrinkage_
    score_ = oa.score(X_centered)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_centered,
                                                 assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0:1]
    oa = OAS(assume_centered=True)
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_1d, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
    assert_array_almost_equal((X_1d**2).sum() / n_samples, oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False, assume_centered=True)
    oa.fit(X_centered)
    assert_almost_equal(oa.score(X_centered), score_, 4)
    assert (oa.precision_ is None)

    # Same tests without assuming centered data--------------------------------
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X)
    assert_almost_equal(oa.shrinkage_, shrinkage_, 4)
    assert_almost_equal(oa.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shrinkage_from_mle = oas(X)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_1d)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

    # test with one sample
    # warning should be raised when using only 1 sample
    X_1sample = np.arange(5).reshape(1, 5)
    oa = OAS()
    warn_msg = (
        "Only one sample available. You may want to reshape your data array")
    with pytest.warns(UserWarning, match=warn_msg):
        oa.fit(X_1sample)

    assert_array_almost_equal(oa.covariance_,
                              np.zeros(shape=(5, 5), dtype=np.float64))

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X)
    assert_almost_equal(oa.score(X), score_, 4)
    assert (oa.precision_ is None)