def test_graph_lasso(random_state=0): # Sample data from a sparse multivariate normal dim = 20 n_samples = 100 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.95, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) emp_cov = empirical_covariance(X) for alpha in (.1, .01): covs = dict() for method in ('cd', 'lars'): cov_, _, costs = graph_lasso(emp_cov, alpha=.1, return_costs=True) covs[method] = cov_ costs, dual_gap = np.array(costs).T # Check that the costs always decrease assert_array_less(np.diff(costs), 0) # Check that the 2 approaches give similar results assert_array_almost_equal(covs['cd'], covs['lars']) # Smoke test the estimator model = GraphLasso(alpha=.1).fit(X) assert_array_almost_equal(model.covariance_, covs['cd'])
def mk_spd(dim, alpha, maxc, minc, rs): prec = make_sparse_spd_matrix(dim=dim, alpha=alpha, largest_coef=maxc, smallest_coef=minc, random_state=rs) prec = prec.tolist() return [prec]
def test_graph_lasso_cv(random_state=1): # Sample data from a sparse multivariate normal dim = 5 n_samples = 6 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.96, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) # Capture stdout, to smoke test the verbose mode orig_stdout = sys.stdout try: sys.stdout = StringIO() # We need verbose very high so that Parallel prints on stdout GraphLassoCV(verbose=100, alphas=3).fit(X) finally: sys.stdout = orig_stdout
def test_deprecated_grid_scores(random_state=1): dim = 5 n_samples = 6 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.96, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) graphical_lasso = GraphicalLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1) graphical_lasso.fit(X) depr_message = ("Attribute grid_scores was deprecated in version " "0.19 and will be removed in 0.21. Use " "``grid_scores_`` instead") with pytest.warns(DeprecationWarning, match=depr_message): assert_equal(graphical_lasso.grid_scores, graphical_lasso.grid_scores_)
def test_graph_lasso_cv(random_state=1): # Sample data from a sparse multivariate normal dim = 5 n_samples = 6 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.96, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) # Capture stdout, to smoke test the verbose mode orig_stdout = sys.stdout try: sys.stdout = StringIO() GraphLassoCV(verbose=10, alphas=3).fit(X) finally: sys.stdout = orig_stdout
def test_graphical_lasso(random_state=0): # Sample data from a sparse multivariate normal dim = 20 n_samples = 100 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.95, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) emp_cov = empirical_covariance(X) for alpha in (0., .1, .25): covs = dict() icovs = dict() for method in ('cd', 'lars'): cov_, icov_, costs = graphical_lasso(emp_cov, return_costs=True, alpha=alpha, mode=method) covs[method] = cov_ icovs[method] = icov_ costs, dual_gap = np.array(costs).T # Check that the costs always decrease (doesn't hold if alpha == 0) if not alpha == 0: assert_array_less(np.diff(costs), 0) # Check that the 2 approaches give similar results assert_array_almost_equal(covs['cd'], covs['lars'], decimal=4) assert_array_almost_equal(icovs['cd'], icovs['lars'], decimal=4) # Smoke test the estimator model = GraphicalLasso(alpha=.25).fit(X) model.score(X) assert_array_almost_equal(model.covariance_, covs['cd'], decimal=4) assert_array_almost_equal(model.covariance_, covs['lars'], decimal=4) # For a centered matrix, assume_centered could be chosen True or False # Check that this returns indeed the same result for centered data Z = X - X.mean(0) precs = list() for assume_centered in (False, True): prec_ = GraphicalLasso( assume_centered=assume_centered).fit(Z).precision_ precs.append(prec_) assert_array_almost_equal(precs[0], precs[1])
def test_graphical_lasso_cv(random_state=1): # Sample data from a sparse multivariate normal dim = 5 n_samples = 6 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.96, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) # Capture stdout, to smoke test the verbose mode orig_stdout = sys.stdout try: sys.stdout = StringIO() # We need verbose very high so that Parallel prints on stdout GraphicalLassoCV(verbose=100, alphas=5, tol=1e-1).fit(X) finally: sys.stdout = orig_stdout # Smoke test with specified alphas GraphicalLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1).fit(X)
def prof_graph_lasso_cv(random_state_seed=1): # Sample data from a sparse multivariate normal dim = 10 # 80 n_samples = 60 # Generate input data random_state = check_random_state(random_state_seed) prec = make_sparse_spd_matrix(dim, alpha=.96, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) utils.cache_value(X, "prof_graph_lasso_cv/X_%d_%d_%d" % (dim, n_samples, random_state_seed)) # Test with alphas as integer ## mode = 'cd' ## gl1 = utils.timeit(GraphLassoCV(verbose=1, alphas=3, mode=mode).fit)(X) ## utils.cache_value(gl1.covariance_, ## "prof_graph_lasso_cv/covariance_%d_%d_%d" % ## (dim, n_samples, random_state_seed)) ## utils.cache_value(gl1.precision_, ## "prof_graph_lasso_cv/precision_%d_%d_%d" % ## (dim, n_samples, random_state_seed)) # Test with alphas as list. # Take same alphas as were found in the first step, check the result # is the same. ## gl2 = utils.timeit(GraphLassoCV(alphas=gl1.cv_alphas_, n_jobs=1, ## mode=mode).fit)(X) ## np.testing.assert_almost_equal(gl1.covariance_, gl2.covariance_, ## decimal=3) ## np.testing.assert_almost_equal(gl1.precision_, gl2.precision_, ## decimal=3) ## np.testing.assert_almost_equal(gl1.alpha_, gl2.alpha_) # Smoke test with an alternate cross-validation object. gl3 = utils.timeit(GraphLassoCV(cv=KFold(n=X.shape[0], n_folds=20), n_jobs=1).fit)(X)
def generate_cov_learn_dataset_repeat(n_signals=50, n_features=15, n_samples=100, alpha=0.95, repeats=10, random_state=0, verbose=True, normalize=False, laplace=False, permute_repeats=False, graphType="random", mix_with_random=False, smallest_coef=-.9): true_covariances = [] true_precisions = [] noised_covariances = [] sigs = [] I = np.eye(n_features) ind = np.arange(0, n_features) ind2 = ind.copy() for i in range(n_samples): if (graphType == 'smallWorld'): if (mix_with_random and np.random.rand(1)[0] < 0.5): prec = make_sparse_spd_matrix(n_features, alpha=alpha, smallest_coef=smallest_coef, random_state=i + random_state) else: data = rags2ridges.createS(10, n_features, topology="small-world", precision=True) prec = np.array(data) np.random.shuffle(ind2) I = np.eye(prec.shape[1]) P = I[:, ind2] C = np.zeros((prec.shape[1], prec.shape[1])) C[np.triu_indices(n_features, k=1)] = prec[np.triu_indices(n_features, k=1)] C = C + C.T C = P.dot(C).dot(P.T) prec = C + I else: prec = make_sparse_spd_matrix(n_features, alpha=alpha, smallest_coef=smallest_coef, random_state=i + random_state) cov = np.linalg.inv(prec) for j in range(repeats): if (laplace): # see prop 3.1 in "A multivariate generalization of the power exponential family of distributions" E. Gomez et al 1998 E = np.tile(np.random.exponential(scale=1.0, size=n_signals), (n_features, 1)).T Z = np.random.multivariate_normal(np.zeros(n_features), cov, n_signals) X = (E)**(0.5) * Z else: X = generate_signal_from_covariance(cov, samples=n_signals, random_state=i + j + random_state + 1) if (normalize): X -= X.mean(axis=0) std = X.std(axis=0) std[std == 0] = 1 X /= std cov_emp = X.T.dot(X) / X.shape[0] if (permute_repeats): #This seems to mess up np.random.shuffle(ind) P = I[:, ind] cov_emp = P.T.dot(cov_emp).dot(P) true_covariances.append(P.dot(cov).dot(P.T)) true_precisions.append(P.dot(prec).dot(P.T)) else: true_covariances.append(cov) true_precisions.append(prec) noised_covariances.append(cov_emp) sigs.append(X) if (verbose): sys.stdout.write("\r%.2f%%" % (float(i * 100) / n_samples)) sys.stdout.flush() return true_covariances, true_precisions, noised_covariances, sigs