def benchmark1(): parameters = dict(n_var=200, n_tasks=5, density=0.15, tol=1e-2, n_alphas=5, max_iter=50, min_samples=100, max_samples=150) next_num, cache_dir, gt = create_signals(parameters, output_dir=output_dir) emp_covs, n_samples = empirical_covariances(gt['signals']) max_alpha, _ = compute_alpha_max(emp_covs, n_samples) min_alpha = max_alpha / 100. print(min_alpha, max_alpha) alphas = np.logspace(np.log10(min_alpha), np.log10(max_alpha), parameters['n_alphas'])[::-1] joblib.Parallel(n_jobs=1, verbose=1)( joblib.delayed(save_group_sparse_covariance)( emp_covs, n_samples, alpha, max_iter=parameters['max_iter'], tol=parameters['tol'], debug=False, cache_dir=cache_dir, num=num) for alpha, num in zip(alphas, itertools.count(next_num)))
def sample_precision_space(parameters, number=100): """Launch a large number of times the same estimation, with different starting points. number: int number of samples to generate. """ # Estimation max_iter = 200 # Generate signals next_num, cache_dir, gt = create_signals(parameters, output_dir="_gsc_sensitivity") precisions, topology, signals = (gt["precisions"], gt["topology"], gt["signals"]) emp_covs, n_samples = empirical_covariances(signals) print("alpha max: %.3e" % compute_alpha_max(emp_covs, n_samples)[0]) # Estimate a lot of precision matrices parameters = joblib.Parallel(n_jobs=7, verbose=1)( joblib.delayed(save_group_sparse_covariance)( emp_covs, n_samples, parameters["alpha"], max_iter=max_iter, tol=parameters["tol"], cache_dir=cache_dir, num=n) for n in xrange(next_num, next_num + number))
def benchmark1(output_dir="_prof_group_sparse_covariance"): """Run group_sparse_covariance on a simple case, for benchmarking.""" parameters = {'n_tasks': 40, 'n_var': 30, 'density': 0.15, 'alpha': .01, 'tol': 1e-4, 'max_iter': 50} _, _, gt = create_signals(parameters, output_dir=output_dir) _, est_precs = utils.timeit(group_sparse_covariance)( gt["signals"], parameters['alpha'], max_iter=parameters['max_iter'], tol=parameters['tol'], verbose=1, debug=False) # Check that output doesn't change between invocations. utils.cache_array(est_precs, os.path.join(output_dir, "benchmark1_est_precs.npy"), decimal=4)
def benchmark(parameters, output_d="_convergence"): _, _, gt = create_signals(parameters, output_dir=output_d) emp_covs, n_samples = empirical_covariances(gt["signals"]) print("alpha_max: %.3e, %.3e" % compute_alpha_max(emp_covs, n_samples)) sp = ScoreProbe(duality_gap=True) _group_sparse_covariance( emp_covs, n_samples, alpha=parameters["alpha"], tol=parameters["tol"], max_iter=parameters["max_iter"], probe_function=sp, verbose=1) return {"log_lik": np.asarray(sp.log_lik), "objective": np.asarray(sp.objective), "precisions": np.asarray(sp.precisions), "duality_gap": np.asarray(sp.duality_gap), "time": np.asarray(sp.wall_clock)}, gt
def benchmark2(output_dir="_prof_group_sparse_covariance"): """Run GroupSparseCovarianceCV on a simple case, for benchmarking.""" parameters = {'n_tasks': 40, 'n_var': 10, 'density': 0.15, 'alphas': 4, 'tol': 1e-4, 'max_iter': 50} parameters["tol_cv"] = parameters["tol"] parameters["max_iter_cv"] = parameters["max_iter"] _, _, gt = create_signals(parameters, output_dir=output_dir) gsc = GroupSparseCovarianceCV(alphas=parameters['alphas'], max_iter=parameters['max_iter'], tol=parameters['tol'], max_iter_cv=parameters['max_iter_cv'], tol_cv=parameters['tol_cv'], verbose=1, debug=False, early_stopping=True) utils.timeit(gsc.fit)(gt["signals"]) print(gsc.alpha_) utils.cache_array(gsc.precisions_, os.path.join(output_dir, "est_precs_cv_{n_var:d}.npy".format(**parameters)), decimal=3)
def lasso_gsc_comparison(): """Check that graph lasso and group-sparse covariance give the same output for a single task.""" from sklearn.covariance import graph_lasso, empirical_covariance parameters = {'n_tasks': 1, 'n_var': 20, 'density': 0.15, 'rho': .2, 'tol': 1e-4, 'max_iter': 50} _, _, gt = create_signals(parameters, output_dir=output_dir) signals = gt["signals"] _, gsc_precision = utils.timeit(group_sparse_covariance)( signals, parameters['rho'], max_iter=parameters['max_iter'], tol=parameters['tol'], verbose=1, debug=False) emp_cov = empirical_covariance(signals[0]) _, gl_precision = utils.timeit(graph_lasso)( emp_cov, parameters['rho'], tol=parameters['tol'], max_iter=parameters['max_iter']) np.testing.assert_almost_equal(gl_precision, gsc_precision[..., 0], decimal=4)
def singular_cov_case(): """Check behaviour of algorithm for singular input matrix.""" parameters = {'n_tasks': 10, 'n_var': 40, 'density': 0.15, 'rho': .1, 'tol': 1e-2, 'max_iter': 50, 'min_samples': 10, 'max_samples': 15} _, _, gt = create_signals(parameters, output_dir=output_dir) signals = gt["signals"] emp_covs, _ = empirical_covariances(signals) # Check that all covariance matrices are singular. eps = np.finfo(float).eps for k in range(emp_covs.shape[-1]): eigvals = np.linalg.eigvalsh(emp_covs[..., k]) assert(abs(eigvals.min()) <= 50 * eps) _, gsc_precisions = utils.timeit(group_sparse_covariance)( signals, parameters['rho'], max_iter=parameters['max_iter'], tol=parameters['tol'], verbose=1, debug=False) print('found sparsity: {0:.3f}' ''.format(1. * (gsc_precisions[..., 0] != 0).sum() / gsc_precisions.shape[0] ** 2))
def benchmark3(): """Compare group_sparse_covariance result for different initializations. """ ## parameters = {'n_tasks': 10, 'n_var': 50, 'density': 0.15, ## 'alpha': .001, 'tol': 1e-2, 'max_iter': 100} parameters = {'n_var': 40, 'n_tasks': 10, 'density': 0.15, 'alpha': .01, 'tol': 1e-3, 'max_iter': 100} mem = joblib.Memory(".") _, _, gt = create_signals(parameters, output_dir="_prof_group_sparse_covariance") signals = gt["signals"] emp_covs, n_samples = empirical_covariances(signals) print("alpha max: " + str(compute_alpha_max(emp_covs, n_samples))) # With diagonal elements initialization probe1 = ScoreProbe() est_precs1, probe1 = mem.cache(modified_gsc)(signals, parameters, probe1) probe1.comment = "diagonal" # set after execution for joblib not to see it probe1.plot() # With Ledoit-Wolf initialization ld = np.empty(emp_covs.shape) for k in range(emp_covs.shape[-1]): ld[..., k] = np.linalg.inv(ledoit_wolf(signals[k])[0]) probe1 = ScoreProbe() est_precs1, probe1 = utils.timeit(mem.cache(modified_gsc))( signals, parameters, probe=probe1) probe1.comment = "diagonal" # for joblib to ignore this value probe2 = ScoreProbe() parameters["precisions_init"] = ld est_precs2, probe2 = utils.timeit(mem.cache(modified_gsc))( signals, parameters, probe=probe2) probe2.comment = "ledoit-wolf" print("difference between final estimates (max norm) %.2e" % abs(est_precs1 - est_precs2).max()) pl.figure() pl.semilogy(probe1.timings[1:], probe1.max_norm, "+-", label=probe1.comment) pl.semilogy(probe2.timings[1:], probe2.max_norm, "+-", label=probe2.comment) pl.xlabel("Time [s]") pl.ylabel("Max norm") pl.grid() pl.legend(loc="best") pl.figure() pl.plot(probe1.timings, probe1.objective, "+-", label=probe1.comment) pl.plot(probe2.timings, probe2.objective, "+-", label=probe2.comment) pl.xlabel("Time [s]") pl.ylabel("objective") pl.grid() pl.legend(loc="best") pl.show()