def test_oas(): """Tests OAS module on a simple dataset. """ # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X, assume_centered=True) assert_almost_equal(oa.shrinkage_, 0.018740, 4) assert_almost_equal(oa.score(X, assume_centered=True), -5.03605, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X, assume_centered=True) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 oa = OAS() oa.fit(X_1d, assume_centered=True) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X, assume_centered=True) assert_almost_equal(oa.score(X, assume_centered=True), -5.03605, 4) assert(oa.precision_ is None) ### Same tests without assuming centered data # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X) assert_almost_equal(oa.shrinkage_, 0.020236, 4) assert_almost_equal(oa.score(X), 2.079025, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 oa = OAS() oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X) assert_almost_equal(oa.score(X), 2.079025, 4) assert(oa.precision_ is None)
def estimate(df, mean_est='equal_weights', cov_est='equal_weights', alpha=1e-10): """ Estimate mean and covariance given historical data Parameters ---------- df: pd.DataFrame (n.sample, n.feature) historical data mean_est: str method to estimate mean selected from {'equal_weights', 'exponential_weights', 'linear-weights'} cov_est: str method to estimate covariance selected from {'equal_weights', 'exponential_weights', 'ledoit_wolf', 'oas'} alpha: float, required if exponential_weights selected [0, 1], larger alpha means more weights on near exponential_weights -> equal_weights if alpha -> 0 Return ------ mean, cov: np.array estimated mean (n.feature) and covariance (n.feature * n.feature) """ if not isinstance(df, pd.DataFrame): raise TypeError('Historical data must be data frame.') if not isinstance(alpha, float): raise TypeError('Parameter alpha must be float.') if mean_est == 'equal_weights': mean = df.mean().values elif mean_est == 'exponential_weights': mean = df.ewm(alpha=alpha).mean().iloc[-1].values elif mean_est == 'linear-weights': weights = np.array(range(1, df.shape[0] + 1)) mean = df.values.T @ weights / sum(weights) else: raise ValueError('Method does not exist.') if cov_est == 'equal_weights': cov = df.cov().values elif cov_est == 'exponential_weights': cov = df.ewm(alpha=alpha).cov().iloc[-df.shape[1]:].values elif cov_est == 'ledoit_wolf': cov, _ = ledoit_wolf(df) elif cov_est == 'oas': cov, _ = oas(df) else: raise ValueError('Method does not exist.') return mean, cov
def _construct_mcca_gevp(Xs, regs=None, as_lists=False): r""" Constructs the matrices for the MCCA generalized eigenvector problem :math:`LHS v = \lambda RHS v`. Parameters ---------- Xs : list of array-likes or numpy.ndarray The list of data matrices regs : None | float | 'lw' | 'oas' or list of them, shape (n_views) As described in ``mvlearn.mcca.mcca.MCCA`` as_lists : bool If True, returns LHS and RHS as lists of composing blocks instead of their composition into full matrices. Returns ------- LHS, RHS : numpy.ndarray, (sum_b n_features_b, sum_b n_features_b) Left and right hand side matrices for the GEVP """ Xs, n_views, n_samples, n_features = check_Xs( Xs, multiview=True, return_dimensions=True ) regs = _check_regs(regs, n_views) LHS = [[None for b in range(n_views)] for b in range(n_views)] RHS = [None for b in range(n_views)] # cross covariance matrices for (a, b) in combinations(range(n_views), 2): LHS[a][b] = Xs[a].T @ Xs[b] LHS[b][a] = LHS[a][b].T # view covariance matrices, possibly regularized for b in range(n_views): if regs[b] is None: RHS[b] = Xs[b].T @ Xs[b] elif isinstance(regs[b], Number): RHS[b] = (1 - regs[b]) * Xs[b].T @ Xs[b] + \ regs[b] * np.eye(n_features[b]) elif isinstance(regs[b], str): if regs[b] == "lw": RHS[b] = ledoit_wolf(Xs[b])[0] elif regs[b] == "oas": RHS[b] = oas(Xs[b])[0] # put back on scale of X^TX as oppose to # proper cov est returned by these functions RHS[b] *= n_samples LHS[b][b] = RHS[b] if not as_lists: LHS = np.block(LHS) RHS = block_diag(*RHS) return LHS, RHS
def oracle_approximating(self): """ Calculate the Oracle Approximating Shrinkage estimate :return: shrunk sample covariance matrix :rtype: np.ndarray """ X = np.nan_to_num(self.X.values) shrunk_cov, self.delta = covariance.oas(X) return self.format_and_annualise(shrunk_cov)
def plot_all(X): tsne = manifold.TSNE(n_components=2, init='pca', random_state=0) #---------------------------------------------------------------------- # Pre-processing print "t-SNE Scaling" X_scaled = preprocessing.scale(X) #zero mean, unit variance X_tsne_scaled = tsne.fit_transform(X_scaled) #normalize the data (scaling individual samples to have unit norm) print "t-SNE L2 Norm" X_normalized = preprocessing.normalize(X, norm='l2') X_tsne_norm = tsne.fit_transform(X_normalized) #whiten the data print "t-SNE Whitening" # the mean computed by the scaler is for the feature dimension. # We want the normalization to be in feature dimention. # Zero mean for each sample assumes stationarity which is not necessarily true for CNN features. # X: NxD where N is number of examples and D is number of features. # scaler = preprocessing.StandardScaler(with_std=False).fit(X) scaler = preprocessing.StandardScaler().fit( X) #this scales each feature to have std-dev 1 X_centered = scaler.transform(X) # U, s, Vh = linalg.svd(X_centered) shapeX = X_centered.shape IPython.embed() # this is DxD matrix where D is the feature dimension # still to figure out: It seems computation is not a problem but carrying around a 50kx50k matrix is memory killer! sig = (1 / shapeX[0]) * np.dot(X_centered.T, X_centered) sig2 = covariance.empirical_covariance( X_centered, assume_centered=True) #estimated -- this is better. sig3, shrinkage = covariance.oas(X_centered, assume_centered=True) #estimated U, s, Vh = linalg.svd(sig, full_matrices=False) eps = 1e-2 # this affects how many low- freq eigevalues are eliminated invS = np.diag(np.reciprocal(np.sqrt(s + eps))) #PCA_whiten X_pca = np.dot(invS, np.dot(U.T, X_centered)) X_tsne_pca = tsne.fit_transform(X_pca) #whiten the data (ZCA) X_zca = np.dot(U, X_pca) X_tsne_zca = tsne.fit_transform(X_zca) return X_tsne_scaled, X_tsne_norm, X_tsne_pca, X_tsne_zca
def test_oas(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) result = df.covariance.oas() expected = covariance.oas(iris.data) self.assertEqual(len(result), 2) self.assertTrue(isinstance(result[0], pdml.ModelFrame)) self.assert_index_equal(result[0].index, df.data.columns) self.assert_index_equal(result[0].columns, df.data.columns) self.assert_numpy_array_almost_equal(result[0].values, expected[0]) self.assert_numpy_array_almost_equal(result[1], expected[1])
def covariance(H_estimates, m, cov_mode): """Covariance estimation for H-vector with different methods""" if cov_mode == 'ledoit_wolf': cov, _ = ledoit_wolf(H_estimates.T) elif cov_mode == 'empirical': cov = np.cov(H_estimates) elif cov_mode == 'shrink_ss': cov, _ = covar.cov_shrink_ss(H_estimates.T) elif cov_mode == "shrink_rblw": S = np.cov(H_estimates) cov, _ = covar.cov_shrink_rblw(S, H_estimates.shape[1]) else: # default: 'oas' cov, _ = oas(H_estimates.T) cov = cov / m return cov
def shrinkage(xs): '''Estimate covariance using Oracle Approximating shrinkage. Parameters ---------- xs : array_like N samples of X. Returns ------- C : array_like Covariance matrix estimation. ''' C, _alpha = oas(xs, assume_centered=True) return C
def test_oas(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) result = df.covariance.oas() expected = covariance.oas(iris.data) self.assertEqual(len(result), 2) self.assertIsInstance(result[0], pdml.ModelFrame) tm.assert_index_equal(result[0].index, df.data.columns) tm.assert_index_equal(result[0].columns, df.data.columns) self.assert_numpy_array_almost_equal(result[0].values, expected[0]) self.assert_numpy_array_almost_equal(result[1], expected[1])
def _oas(X): """Wrapper for sklearn oas covariance estimator. Parameters ---------- X : ndarray EEG signal, shape (n_channels, n_samples). Returns ------- C : ndarray Estimated covariance, shape (n_channels, n_channels). """ C, _ = oas(X.T) return C
def get_covariance_estimator(estimator): if hasattr(estimator, "__call__"): f = estimator elif type(estimator) == str: if estimator == "MCD" or estimator == "mcd" or estimator == "MinCovDet" or estimator == "fast_mcd": f = fast_mcd elif estimator == "Ledoit-Wolf" or estimator == "LW" or estimator == "lw": f = lambda x: ledoit_wolf(x)[0] elif estimator == "OAS" or estimator == "oas": f = lambda x: oas(x)[0] else: f = empirical_covariance else: f = empirical_covariance return f
def plot_all(X): tsne = manifold.TSNE(n_components=2, init='pca', random_state=0) #---------------------------------------------------------------------- # Pre-processing print "t-SNE Scaling" X_scaled = preprocessing.scale(X) #zero mean, unit variance X_tsne_scaled = tsne.fit_transform(X_scaled) #normalize the data (scaling individual samples to have unit norm) print "t-SNE L2 Norm" X_normalized = preprocessing.normalize(X, norm='l2') X_tsne_norm = tsne.fit_transform(X_normalized) #whiten the data print "t-SNE Whitening" # the mean computed by the scaler is for the feature dimension. # We want the normalization to be in feature dimention. # Zero mean for each sample assumes stationarity which is not necessarily true for CNN features. # X: NxD where N is number of examples and D is number of features. # scaler = preprocessing.StandardScaler(with_std=False).fit(X) scaler = preprocessing.StandardScaler().fit(X) #this scales each feature to have std-dev 1 X_centered = scaler.transform(X) # U, s, Vh = linalg.svd(X_centered) shapeX = X_centered.shape IPython.embed() # this is DxD matrix where D is the feature dimension # still to figure out: It seems computation is not a problem but carrying around a 50kx50k matrix is memory killer! sig = (1/shapeX[0]) * np.dot(X_centered.T, X_centered) sig2= covariance.empirical_covariance(X_centered, assume_centered=True) #estimated -- this is better. sig3, shrinkage= covariance.oas(X_centered, assume_centered=True) #estimated U, s, Vh = linalg.svd(sig, full_matrices=False) eps = 1e-2 # this affects how many low- freq eigevalues are eliminated invS = np.diag (np.reciprocal(np.sqrt(s+eps))) #PCA_whiten X_pca = np.dot(invS, np.dot(U.T, X_centered)) X_tsne_pca = tsne.fit_transform(X_pca) #whiten the data (ZCA) X_zca = np.dot(U, X_pca) X_tsne_zca = tsne.fit_transform(X_zca) return X_tsne_scaled, X_tsne_norm, X_tsne_pca, X_tsne_zca
def optimize(returns, risk_aversion, parameters): K, p, iterations = parameters[0], parameters[1], parameters[2] # Predict the returns posteriori_prob, mu_s, cov_s, predicted_return = expectation_maximization( returns, K, iterations, p) # UNCOMMENT THIS IF YOU WANT TO INVEST IN TOP nLongs ASSETS WITH HIGHEST PREDICTED RETURNS # nLongs = 3 # idx = (-predicted_return).argsort()[:nLongs] # weights = [0] * predicted_return # weights[idx] = 1 / nLongs # return weights cov = risk_aversion * pd.DataFrame(data=cv.oas(returns)[0], index=returns.columns, columns=returns.columns).fillna(0) problem = osqp.OSQP() k = len(predicted_return) """ setup(self, P=None, q=None, A=None, l=None, u=None, **settings): Setup OSQP solver problem of the form minimize 1/2 x' * P * x + q' * x subject to l <= A * x <= u """ A = np.concatenate((pd.np.ones((1, k)), np.eye(k)), axis=0) sA = sparse.csr_matrix(A) l = np.hstack([1, np.zeros(k)]) u = np.ones(k + 1) sCov = sparse.csr_matrix(cov) problem.setup(sCov, -predicted_return, sA, l, u) # Solve problem res = problem.solve() pr = pd.Series(data=res.x, index=returns.columns) return pr
def update_covmatrix(logreturns, assets_table, selected_rows, method): logreturns = pd.DataFrame(logreturns).set_index('Date') df = pd.DataFrame(assets_table) assets = df[df.index.isin(selected_rows)][['ticker', 'part', 'mktcap']] assets['part'] = assets['part'] / assets['part'].sum() assets['wmktcap'] = assets['mktcap'] / assets['mktcap'].sum() tickers = assets['ticker'].values if method == 'ledoit-wolf': covmatrix = LedoitWolf().fit(logreturns.dropna()).covariance_ covmatrix = pd.DataFrame(covmatrix, index=tickers, columns=tickers) elif method == 'oas': covmatrix, x = oas(logreturns.dropna()) covmatrix = pd.DataFrame(covmatrix, index=tickers, columns=tickers) else: covmatrix = logreturns.cov() m_ibov = r_ibov.resample('MS').sum() L = (.06 / 12) / (2. * m_ibov.std()[0]**2) assets['rindex'] = 2 * L * covmatrix.values @ assets['part'].values assets['rmktcap'] = 2 * L * covmatrix.values @ assets['wmktcap'].values return covmatrix.reset_index().to_dict('records'), \ assets.to_dict('records')
def _oas(X): """Wrapper for sklearn oas covariance estimator""" C, _ = oas(X.T) return C
def _compute_power_envelopes(subject, kind, freqs): ########################################################################### # Compute source space # ------------------- src = mne.setup_source_space(subject, spacing='oct6', add_dist=False, subjects_dir=cfg.mne_camcan_freesurfer_path) trans = trans_map[subject] bem = cfg.mne_camcan_freesurfer_path + \ "/%s/bem/%s-meg-bem.fif" % (subject, subject) ########################################################################### # Compute handle MEG data # ----------------------- fname = op.join(cfg.camcan_meg_raw_path, subject, kind, '%s_raw.fif' % kind) raw = mne.io.read_raw_fif(fname) mne.channels.fix_mag_coil_types(raw.info) if DEBUG: # raw.crop(0, 180) raw.crop(0, 120) else: raw.crop(0, 300) raw = _run_maxfilter(raw, subject, kind) _compute_add_ssp_exg(raw) # get empty room fname_er = op.join(cfg.camcan_meg_path, "emptyroom", subject, "emptyroom_%s.fif" % subject) raw_er = mne.io.read_raw_fif(fname_er) mne.channels.fix_mag_coil_types(raw.info) raw_er = _run_maxfilter(raw_er, subject, kind, coord_frame="meg") raw_er.info["projs"] += raw.info["projs"] cov = mne.compute_raw_covariance(raw_er, method='oas') # compute before band-pass of interest event_length = 5. event_overlap = 0. raw_length = raw.times[-1] events = mne.make_fixed_length_events(raw, duration=event_length, start=0, stop=raw_length - event_length) ####################################################################### # Compute the forward and inverse # ------------------------------- info = mne.Epochs(raw, events=events, tmin=0, tmax=event_length, baseline=None, reject=None, preload=False, decim=10).info fwd = mne.make_forward_solution(info, trans, src, bem) inv = make_inverse_operator(info, fwd, cov) del fwd ####################################################################### # Compute label time series and do envelope correlation # ----------------------------------------------------- mne_subjects_dir = "/storage/inria/agramfor/MNE-sample-data/subjects" labels = mne.read_labels_from_annot('fsaverage', 'aparc_sub', subjects_dir=mne_subjects_dir) labels = mne.morph_labels(labels, subject_from='fsaverage', subject_to=subject, subjects_dir=cfg.mne_camcan_freesurfer_path) labels = [ll for ll in labels if 'unknown' not in ll.name] results = dict() for fmin, fmax, band in freqs: print(f"computing {subject}: {fmin} - {fmax} Hz") this_raw = raw.copy() this_raw.filter(fmin, fmax, n_jobs=1) reject = _get_global_reject_epochs(this_raw, decim=5) this_raw.apply_hilbert(envelope=False) epochs = mne.Epochs(this_raw, events=events, tmin=0, tmax=event_length, baseline=None, reject=reject, preload=True, decim=5) if DEBUG: epochs = epochs[:3] result = { 'subject': subject, 'fmin': fmin, 'fmax': fmax, 'band': band, 'label_names': [ll.name for ll in labels] } stcs = apply_inverse_epochs(epochs, inv, lambda2=1. / 9., pick_ori='normal', method='MNE', return_generator=True) label_ts = np.concatenate(mne.extract_label_time_course( stcs, labels, inv['src'], mode="pca_flip", return_generator=False), axis=-1) result['cov'], _ = oas(np.abs(label_ts).T, assume_centered=False) for orth in ("pairwise", False): corr = envelope_correlation(label_ts[np.newaxis], combine="mean", orthogonalize=orth) result[f"corr{'_orth' if orth else ''}"] = corr[np.triu_indices( len(corr))] results[band] = result if False: # failsafe mode with intermediate steps written out out_fname = op.join( cfg.derivative_path, f'{subject + ("-debug" if DEBUG else "")}_' f'power_envelopes_{band}.h5') mne.externals.h5io.write_hdf5(out_fname, result, overwrite=True) return results
def test_oas(): # Tests OAS module on a simple dataset. # test shrinkage coeff on a simple data set X_centered = X - X.mean(axis=0) oa = OAS(assume_centered=True) oa.fit(X_centered) shrinkage_ = oa.shrinkage_ score_ = oa.score(X_centered) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_centered, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True) scov.fit(X_centered) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS(assume_centered=True) oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal((X_1d**2).sum() / n_samples, oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False, assume_centered=True) oa.fit(X_centered) assert_almost_equal(oa.score(X_centered), score_, 4) assert (oa.precision_ is None) # Same tests without assuming centered data-------------------------------- # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X) assert_almost_equal(oa.shrinkage_, shrinkage_, 4) assert_almost_equal(oa.score(X), score_, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS() oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4) # test with one sample # FIXME I don't know what this test does X_1sample = np.arange(5) oa = OAS() assert_warns(UserWarning, oa.fit, X_1sample) assert_array_almost_equal(oa.covariance_, np.zeros(shape=(5, 5), dtype=np.float64)) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X) assert_almost_equal(oa.score(X), score_, 4) assert (oa.precision_ is None)
def train (dataset, modelState, queryState, trainPlan): ''' Infers the topic distributions in general, and specifically for each individual datapoint. Params: data - the dataset of words, features and links of which only words are used in this model modelState - the actual CTM model queryState - the query results - essentially all the "local" variables matched to the given observations trainPlan - how to execute the training process (e.g. iterations, log-interval etc.) Return: A new model object with the updated model (note parameters are updated in place, so make a defensive copy if you want it) A new query object with the update query parameters ''' W = dataset.words D,_ = W.shape # Unpack the the structs, for ease of access and efficiency iterations, epsilon, logFrequency, diagonalPriorCov, debug = trainPlan.iterations, trainPlan.epsilon, trainPlan.logFrequency, trainPlan.fastButInaccurate, trainPlan.debug means, expMeans, varcs, lxi, s, n = queryState.means, queryState.expMeans, queryState.varcs, queryState.lxi, queryState.s, queryState.docLens K, topicMean, sigT, vocab, vocabPrior, dtype = modelState.K, modelState.topicMean, modelState.sigT, modelState.vocab, modelState.vocabPrior, modelState.dtype # Book-keeping for logs boundIters = np.zeros(shape=(iterations // logFrequency,)) boundValues = np.zeros(shape=(iterations // logFrequency,)) likelyValues = np.zeros(shape=(iterations // logFrequency,)) bvIdx = 0 debugFn = _debug_with_bound if debug else _debug_with_nothing # Initialize some working variables isigT = la.inv(sigT) R = W.copy() s.fill(0) priorSigt_diag = np.ndarray(shape=(K,), dtype=dtype) priorSigt_diag.fill (0.1) kappa = K + 2 expMeans = means.copy() # Iterate over parameters for itr in range(iterations): # We start with the M-Step, so the parameters are consistent with our # initialisation of the RVs when we do the E-Step # Update the mean and covariance of the prior # topicMean = means.mean(axis = 0) topicMean = means.sum(axis=0) / (D + kappa) \ if USE_NIW_PRIOR \ else means.mean(axis=0) debugFn (itr, topicMean, "topicMean", W, K, topicMean, sigT, vocab, vocabPrior, dtype, means, varcs, lxi, s, n) # diff = means - topicMean # sigT = diff.T.dot(diff) / D sigT, _ = oas(means, assume_centered=False) if dtype is not np.float64: sigT = sigT.astype(dtype) sigT += np.diag(varcs.mean(axis=0)) if USE_NIW_PRIOR: sigT.flat[::K+1] += priorSigt_diag sigT += (kappa * D)/(kappa + D) * np.outer(topicMean, topicMean) # Building blocks... # 1/4 Create the precision matrix from the covariance if True or diagonalPriorCov: diag = np.diag(sigT) sigT = np.diag(diag) isigT = np.diag(1. / diag) else: isigT = la.inv(sigT) debugFn (itr, sigT, "sigT", W, K, topicMean, sigT, vocab, vocabPrior, dtype, means, varcs, lxi, s, n) # print (" Det sigT = " + str(la.det(sigT))) # 2/4 temporarily replace means with exp(means) expMeans = np.exp(means - means.max(axis=1)[:,np.newaxis], out=expMeans) R = sparseScalarQuotientOfDot(W, expMeans, vocab, out=R) # S = expMeans * R.dot(vocab.T) # 3/4 Update the vocabulary vocab *= (R.T.dot(expMeans)).T # Awkward order to maintain sparsity (R is sparse, expMeans is dense) vocab += vocabPrior vocab = normalizerows_ip(vocab) R = sparseScalarQuotientOfDot(W, expMeans, vocab, out=R) S = expMeans * R.dot(vocab.T) # 4/4 Reset the means to their original form, and log effect of vocab update #means = np.log(expMeans, out=expMeans) debugFn (itr, vocab, "vocab", W, K, topicMean, sigT, vocab, vocabPrior, dtype, means, varcs, lxi, s, n) # And now this is the E-Step, though it's followed by updates for the # parameters also that handle the log-sum-exp approximation. # Update the Variances varcs = np.reciprocal(n[:,np.newaxis] * lxi + isigT.flat[::K+1]) debugFn (itr, varcs, "varcs", W, K, topicMean, sigT, vocab, vocabPrior, dtype, means, varcs, lxi, s, n) # Update the Means vMat = (s[:,np.newaxis] * lxi - 0.5) * n[:,np.newaxis] + S rhsMat = vMat + isigT.dot(topicMean) # for d in range(D): # means[d,:] = la.inv(isigT + ssp.diags(n[d] * lxi[d,:], 0)).dot(rhsMat[d,:]) means = varcs * rhsMat means -= (means[:,0])[:,np.newaxis] debugFn (itr, means, "means", W, K, topicMean, sigT, vocab, vocabPrior, dtype, means, varcs, lxi, s, n) # Update the approximation parameters lxi = 2 * negJakkolaOfDerivedXi(means, varcs, s) debugFn (itr, lxi, "lxi", W, K, topicMean, sigT, vocab, vocabPrior, dtype, means, varcs, lxi, s, n) # s can sometimes grow unboundedly # If so Bouchard's suggested approach of fixing it at zero # #s = (np.sum(lxi * means, axis=1) + 0.25 * K - 0.5) / np.sum(lxi, axis=1) debugFn (itr, s, "s", W, K, topicMean, sigT, vocab, vocabPrior, dtype, means, varcs, lxi, s, n) if logFrequency > 0 and itr % logFrequency == 0: modelState = ModelState(K, topicMean, sigT, vocab, vocabPrior, dtype, MODEL_NAME) queryState = QueryState(means, expMeans, varcs, lxi, s, n) boundValues[bvIdx] = var_bound(dataset, modelState, queryState) likelyValues[bvIdx] = log_likelihood(dataset, modelState, queryState) boundIters[bvIdx] = itr perp = perplexity_from_like(likelyValues[bvIdx], n.sum()) print (time.strftime('%X') + " : Iteration %5d: Perplexity %4.2f Bound %10.2f " % (itr, perp, boundValues[bvIdx])) if bvIdx > 0 and boundValues[bvIdx - 1] > boundValues[bvIdx]: printStderr ("ERROR: bound degradation: %f > %f" % (boundValues[bvIdx - 1], boundValues[bvIdx])) # print ("Means: min=%f, avg=%f, max=%f\n\n" % (means.min(), means.mean(), means.max())) # Check to see if the improvment in the likelihood has fallen below the threshold if bvIdx > 1 and boundIters[bvIdx] >= 30: lastPerp = perplexity_from_like(likelyValues[bvIdx - 1], n.sum()) if lastPerp - perp < 1: boundIters, boundValues, likelyValues = clamp (boundIters, boundValues, likelyValues, bvIdx) return modelState, queryState, (boundIters, boundValues, likelyValues) bvIdx += 1 return \ ModelState(K, topicMean, sigT, vocab, vocabPrior, dtype, MODEL_NAME), \ QueryState(means, expMeans, varcs, lxi, s, n), \ (boundIters, boundValues, likelyValues)
def test_oas(): """Tests OAS module on a simple dataset. """ # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X, assume_centered=True) assert_almost_equal(oa.shrinkage_, 0.018740, 4) assert_almost_equal(oa.score(X, assume_centered=True), -5.03605, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X, assume_centered=True) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS() oa.fit(X_1d, assume_centered=True) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X, assume_centered=True) assert_almost_equal(oa.score(X, assume_centered=True), -5.03605, 4) assert(oa.precision_ is None) ### Same tests without assuming centered data # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X) assert_almost_equal(oa.shrinkage_, 0.020236, 4) assert_almost_equal(oa.score(X), 2.079025, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS() oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X) assert_almost_equal(oa.score(X), 2.079025, 4) assert(oa.precision_ is None)
def test_oas(): # Tests OAS module on a simple dataset. # test shrinkage coeff on a simple data set X_centered = X - X.mean(axis=0) oa = OAS(assume_centered=True) oa.fit(X_centered) shrinkage_ = oa.shrinkage_ score_ = oa.score(X_centered) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_centered, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True) scov.fit(X_centered) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0:1] oa = OAS(assume_centered=True) oa.fit(X_1d) oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_1d, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_) assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False, assume_centered=True) oa.fit(X_centered) assert_almost_equal(oa.score(X_centered), score_, 4) assert(oa.precision_ is None) # Same tests without assuming centered data-------------------------------- # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X) assert_almost_equal(oa.shrinkage_, shrinkage_, 4) assert_almost_equal(oa.score(X), score_, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shrinkage_from_mle = oas(X) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS() oa.fit(X_1d) oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_1d) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4) # test with one sample # warning should be raised when using only 1 sample X_1sample = np.arange(5).reshape(1, 5) oa = OAS() assert_warns(UserWarning, oa.fit, X_1sample) assert_array_almost_equal(oa.covariance_, np.zeros(shape=(5, 5), dtype=np.float64)) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X) assert_almost_equal(oa.score(X), score_, 4) assert(oa.precision_ is None)
def test_oas(): """Tests OAS module on a simple dataset. """ # test shrinkage coeff on a simple data set X_centered = X - X.mean(axis=0) oa = OAS(assume_centered=True) oa.fit(X_centered) shrinkage_ = oa.shrinkage_ score_ = oa.score(X_centered) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_centered, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True) scov.fit(X_centered) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS(assume_centered=True) oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal((X_1d**2).sum() / n_samples, oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False, assume_centered=True) oa.fit(X_centered) assert_almost_equal(oa.score(X_centered), score_, 4) assert (oa.precision_ is None) ### Same tests without assuming centered data # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X) assert_almost_equal(oa.shrinkage_, shrinkage_, 4) assert_almost_equal(oa.score(X), score_, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS() oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4) # test with one sample X_1sample = np.arange(5) oa = OAS() with warnings.catch_warnings(record=True): oa.fit(X_1sample) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X) assert_almost_equal(oa.score(X), score_, 4) assert (oa.precision_ is None)
def test_oas(): """Tests OAS module on a simple dataset. """ # test shrinkage coeff on a simple data set X_centered = X - X.mean(axis=0) oa = OAS(assume_centered=True) oa.fit(X_centered) shrinkage_ = oa.shrinkage_ score_ = oa.score(X_centered) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_centered, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True) scov.fit(X_centered) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS(assume_centered=True) oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False, assume_centered=True) oa.fit(X_centered) assert_almost_equal(oa.score(X_centered), score_, 4) assert(oa.precision_ is None) ### Same tests without assuming centered data # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X) assert_almost_equal(oa.shrinkage_, shrinkage_, 4) assert_almost_equal(oa.score(X), score_, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS() oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4) # test with one sample X_1sample = np.arange(5) oa = OAS() with warnings.catch_warnings(record=True): oa.fit(X_1sample) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X) assert_almost_equal(oa.score(X), score_, 4) assert(oa.precision_ is None)
C = [C_20, C_40, C_200, C_400] X_40x20 = np.genfromtxt('tmpmat/X_40x20.csv') X_20x40 = np.genfromtxt('tmpmat/X_20x40.csv') X_400x200 = np.genfromtxt('tmpmat/X_400x200.csv') X_200x400 = np.genfromtxt('tmpmat/X_200x400.csv') X = [X_40x20, X_20x40, X_400x200, X_200x400] times = np.zeros(len(p)) res = np.zeros(len(p)) for i in range(len(p)): Xi = X[i] Ci = C[i] start = time() C_oas, _ = oas(Xi) times[i] = time() - start res[i] = np.linalg.norm(C_oas - Ci) print("OAS results") print(res) print("") print(times) times = np.zeros(len(p)) res = np.zeros(len(p)) for i in range(len(p)): Xi = X[i] Ci = C[i] start = time() C_lw, _ = ledoit_wolf(Xi)
def test_oas(): # Tests OAS module on a simple dataset. # test shrinkage coeff on a simple data set X_centered = X - X.mean(axis=0) oa = OAS(assume_centered=True) oa.fit(X_centered) shrinkage_ = oa.shrinkage_ score_ = oa.score(X_centered) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_centered, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True) scov.fit(X_centered) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0:1] oa = OAS(assume_centered=True) oa.fit(X_1d) oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_1d, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_) assert_array_almost_equal((X_1d**2).sum() / n_samples, oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False, assume_centered=True) oa.fit(X_centered) assert_almost_equal(oa.score(X_centered), score_, 4) assert (oa.precision_ is None) # Same tests without assuming centered data-------------------------------- # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X) assert_almost_equal(oa.shrinkage_, shrinkage_, 4) assert_almost_equal(oa.score(X), score_, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shrinkage_from_mle = oas(X) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS() oa.fit(X_1d) oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_1d) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4) # test with one sample # warning should be raised when using only 1 sample X_1sample = np.arange(5).reshape(1, 5) oa = OAS() warn_msg = ( "Only one sample available. You may want to reshape your data array") with pytest.warns(UserWarning, match=warn_msg): oa.fit(X_1sample) assert_array_almost_equal(oa.covariance_, np.zeros(shape=(5, 5), dtype=np.float64)) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X) assert_almost_equal(oa.score(X), score_, 4) assert (oa.precision_ is None)