def test_group_sparse_covariance():
    # run in debug mode. Should not fail
    # without debug mode: cost must decrease.

    signals, _, _ = generate_group_sparse_gaussian_graphs(
        density=0.1, n_subjects=5, n_features=10,
        min_n_samples=100, max_n_samples=151,
        random_state=np.random.RandomState(0))

    alpha = 0.1

    # These executions must hit the tolerance limit
    emp_covs, omega = group_sparse_covariance(signals, alpha, max_iter=20,
                                              tol=1e-2, debug=True, verbose=0)
    emp_covs, omega2 = group_sparse_covariance(signals, alpha, max_iter=20,
                                               tol=1e-2, debug=True, verbose=0)

    np.testing.assert_almost_equal(omega, omega2, decimal=4)

    class Probe(object):
        def __init__(self):
            self.objective = []

        def __call__(self, emp_covs, n_samples, alpha, max_iter, tol, n, omega,
                     omega_diff):
            if n >= 0:
                _, objective = group_sparse_scores(omega, n_samples, emp_covs,
                                                   alpha)
                self.objective.append(objective)

    # Use a probe to test for number of iterations and decreasing objective.
    probe = Probe()
    emp_covs, omega = group_sparse_covariance(
        signals, alpha, max_iter=4, tol=None, verbose=0, probe_function=probe)
    objective = probe.objective
    # check number of iterations
    assert_equal(len(objective), 4)

    # np.testing.assert_array_less is a strict comparison.
    # Zeros can occur in np.diff(objective).
    assert_true(np.all(np.diff(objective) <= 0))
    assert_equal(omega.shape, (10, 10, 5))

    # Test input argument checking
    assert_raises(ValueError, group_sparse_covariance, signals, "")
    assert_raises(ValueError, group_sparse_covariance, 1, alpha)
    assert_raises(ValueError, group_sparse_covariance,
                  [np.ones((2, 2)), np.ones((2, 3))], alpha)

    # Check consistency between classes
    gsc1 = GroupSparseCovarianceCV(alphas=4, tol=1e-1, max_iter=20, verbose=0,
                                   early_stopping=True)
    gsc1.fit(signals)

    gsc2 = GroupSparseCovariance(alpha=gsc1.alpha_, tol=1e-1, max_iter=20,
                                 verbose=0)
    gsc2.fit(signals)

    np.testing.assert_almost_equal(gsc1.precisions_, gsc2.precisions_,
                                   decimal=4)
Example #2
0
def create_signals(parameters, output_dir="tmp_signals"):
    """Simple cache system.

    parameters: dict
        keys: n_var, n_tasks, density, (mandatory)
        min_samples, max_samples (optional)
        normalize (optional, default True)
    """
    cache_dir = get_cache_dir(parameters, output_dir)

    next_num = 0
    if cache_dir is not None:
        if not os.path.isdir(cache_dir):
            os.makedirs(cache_dir)

        else:
            filenames = glob.glob(os.path.join(cache_dir,
                                               "precisions_*.pickle"))
            numbers = [int(os.path.basename(fname)
                           .rsplit(".")[0]
                           .split("_")[1])
                       for fname in filenames]

            if len(numbers) > 0:
                next_num = max(numbers) + 1

    # Look for/create true precisions, topology and signals
    ground_truth_fname = os.path.join(cache_dir, "ground_truth.pickle")
    if cache_dir is None or not os.path.isfile(ground_truth_fname):
        rand_gen = np.random.RandomState(0)
        min_samples = parameters.get("min_samples", 100)
        max_samples = parameters.get("max_samples", 150)
        # Generate signals
        signals, precisions, topology = \
                 testing.generate_group_sparse_gaussian_graphs(
            n_subjects=parameters["n_tasks"], n_features=parameters["n_var"],
            density=parameters["density"], random_state=rand_gen,
            min_n_samples=min_samples, max_n_samples=max_samples)

        if parameters.get("normalize", True):
            for signal in signals:
                signal /= signal.std(axis=0)
        gt = {"precisions": precisions,
              "topology": topology,
              "signals": signals}
        if cache_dir is not None:
            pickle.dump(gt, open(ground_truth_fname, "wb"))

    if cache_dir is not None:
        gt = pickle.load(open(ground_truth_fname, "rb"))

    return next_num, cache_dir, gt
def cv_object_study(early_stopping=True, output_dir="_early_stopping"):
    """Convenience function for running GroupSparseCovarianceCV. """
    parameters = {'n_tasks': 10, 'tol': 1e-3, 'max_iter': 50, "n_jobs": 7,
                  "cv": 4}
    parameters["tol_cv"] = parameters["tol"]
    parameters["max_iter_cv"] = parameters["max_iter"]

    synthetic = False

    print("-- Getting signals")
    if synthetic:
        parameters["n_features"] = 50
        parameters["density"] = 0.2
        signals, _, _ = testing.generate_group_sparse_gaussian_graphs(
            n_subjects=parameters["n_tasks"],
            n_features=parameters["n_features"],
            min_n_samples=100, max_n_samples=150,
            density=parameters["density"])
    else:
        mem = joblib.Memory(".")
        signals = []
        for n in range(parameters["n_tasks"]):
            signals.append(mem.cache(region_signals)(n))

    print("-- Optimizing")
    gsc = GroupSparseCovarianceCV(early_stopping=early_stopping,
                                  cv=parameters["cv"],
                                  n_jobs=parameters["n_jobs"],
                                  tol=parameters["tol"],
                                  tol_cv=parameters["tol_cv"],
                                  max_iter=parameters["max_iter"],
                                  max_iter_cv=parameters["max_iter_cv"],
                                  verbose=1)
    t0 = time.time()
    gsc.fit(signals)
    t1 = time.time()
    print("\nTime spent in fit(): %.1f s" % (t1 - t0))
    print("\n-- selected alpha: %.3e" % gsc.alpha_)
    print("-- cv_alphas_:")
    print(repr(np.asarray(gsc.cv_alphas_)))
    print("-- cv_scores_:")
    print(repr(np.asarray(gsc.cv_scores_)))

    out_filename = os.path.join(output_dir, "cv_object_study.pickle")
    pickle.dump([gsc.alpha_, gsc.cv_alphas_, gsc.cv_scores_, gsc.covariances_,
                 gsc.precisions_], open(out_filename, "wb"))
import matplotlib.pyplot as plt


def plot_matrix(m, ylabel=""):
    abs_max = abs(m).max()
    plt.imshow(m, cmap=plt.cm.RdBu_r, interpolation="nearest",
              vmin=-abs_max, vmax=abs_max)


# Generate synthetic data
from nilearn._utils.testing import generate_group_sparse_gaussian_graphs

n_subjects = 20  # number of subjects
n_displayed = 3  # number of subjects displayed
subjects, precisions, topology = generate_group_sparse_gaussian_graphs(
    n_subjects=n_subjects, n_features=10, min_n_samples=30, max_n_samples=50,
    density=0.1)

fig = plt.figure(figsize=(10, 7))
plt.subplots_adjust(hspace=0.4)
for n in range(n_displayed):
    plt.subplot(n_displayed, 4, 4 * n + 1)
    plot_matrix(precisions[n])
    if n == 0:
        plt.title("ground truth")
    plt.ylabel("subject %d" % n)


# Run group-sparse covariance on all subjects
from nilearn.group_sparse_covariance import GroupSparseCovarianceCV
gsc = GroupSparseCovarianceCV(max_iter=50, verbose=1)
def benchmark1():
    """Plot different quantities for varying alpha."""
    # Signals
    min_samples, max_samples = 100, 150  # train signals length
    n_var = 50
    n_tasks = 40
    density = 0.1
    random_state = np.random.RandomState(0)

    test_samples = 4000  # number of samples for test signals

    # Estimation
    n_alphas = 10
    max_iter = 200
    tol = 1e-3

    # Generate signals
    signals, precisions, topology = \
             testing.generate_group_sparse_gaussian_graphs(
        n_subjects=n_tasks, n_features=n_var, density=density,
        random_state=random_state, min_n_samples=min_samples,
        max_n_samples=max_samples)

    emp_covs, n_samples = empirical_covariances(signals)

    # Estimate precision matrices
    alpha_1, _ = compute_alpha_max(emp_covs, n_samples)
    alpha_0 = 1e-2 * alpha_1
    ## alpha_1 = 0.067
    ## alpha_0 = 0.044

    alphas = np.logspace(np.log10(alpha_0), np.log10(alpha_1), n_alphas)[::-1]

    parameters = joblib.Parallel(n_jobs=7, verbose=1)(
        joblib.delayed(group_sparse_covariance)(emp_covs, n_samples, alpha,
                                                max_iter=max_iter, tol=tol)
        for alpha in alphas)

    # Compute scores
    test_signals = testing.generate_signals_from_precisions(
        precisions, min_n_samples=test_samples, max_n_samples=test_samples + 1,
        random_state=random_state)

    test_emp_covs, _ = empirical_covariances(test_signals)
    del test_signals

    for params in parameters:
        params["ll_score"], params["pen_score"] = group_sparse_scores(
            params["precisions"], n_samples, test_emp_covs, params["alpha"])

    # Plot graphs
    alpha, ll_score, pen_score = get_series(
        parameters, ("alpha", "ll_score", "pen_score"))
    non_zero = [(p["precisions"][..., 0] != 0).sum() for p in parameters]

    pl.figure()
    pl.semilogx(alpha, ll_score, "-+", label="log-likelihood")
    pl.semilogx(alpha, pen_score, "-+", label="penalized LL")
    pl.xlabel("alpha")
    pl.ylabel("score")
    pl.grid()

    pl.figure()
    pl.semilogx(alpha, non_zero, "-+")
    pl.xlabel("alpha")
    pl.ylabel("non_zero")
    pl.grid()

    pl.figure()
    pl.loglog(alpha, non_zero, "-+")
    pl.xlabel("alpha")
    pl.ylabel("non_zero")
    pl.grid()

    pl.figure()
    pl.imshow(topology, interpolation="nearest")
    pl.title("true topology")

    ## precisions = get_series(parameters, ("precisions", ))
    ## for prec, alpha in zip(precisions, alpha):
    ##     pl.figure()
    ##     pl.imshow(prec[..., 0] != 0, interpolation="nearest")
    ##     pl.title(alpha)

    pl.show()
    abs_max = abs(m).max()
    plt.imshow(m,
               cmap=plt.cm.RdBu_r,
               interpolation="nearest",
               vmin=-abs_max,
               vmax=abs_max)


# Generate synthetic data
from nilearn._utils.testing import generate_group_sparse_gaussian_graphs

n_subjects = 20  # number of subjects
n_displayed = 3  # number of subjects displayed
subjects, precisions, topology = generate_group_sparse_gaussian_graphs(
    n_subjects=n_subjects,
    n_features=10,
    min_n_samples=30,
    max_n_samples=50,
    density=0.1)

fig = plt.figure(figsize=(10, 7))
plt.subplots_adjust(hspace=0.4)
for n in range(n_displayed):
    plt.subplot(n_displayed, 4, 4 * n + 1)
    plot_matrix(precisions[n])
    if n == 0:
        plt.title("ground truth")
    plt.ylabel("subject %d" % n)

# Run group-sparse covariance on all subjects
from nilearn.group_sparse_covariance import GroupSparseCovarianceCV
gsc = GroupSparseCovarianceCV(max_iter=50, verbose=1)