Esempio n. 1
0
def test_graph_lasso(random_state=0):
    # Sample data from a sparse multivariate normal
    dim = 20
    n_samples = 100
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.95,
                                  random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    emp_cov = empirical_covariance(X)

    for alpha in (.1, .01):
        covs = dict()
        for method in ('cd', 'lars'):
            cov_, _, costs = graph_lasso(emp_cov, alpha=.1, return_costs=True)
            covs[method] = cov_
            costs, dual_gap = np.array(costs).T
            # Check that the costs always decrease
            assert_array_less(np.diff(costs), 0)
        # Check that the 2 approaches give similar results
        assert_array_almost_equal(covs['cd'], covs['lars'])

    # Smoke test the estimator
    model = GraphLasso(alpha=.1).fit(X)
    assert_array_almost_equal(model.covariance_, covs['cd'])
Esempio n. 2
0
def test_graph_lasso(random_state=0):
    # Sample data from a sparse multivariate normal
    dim = 20
    n_samples = 100
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.95,
                                  random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    emp_cov = empirical_covariance(X)

    for alpha in (.1, .01):
        covs = dict()
        for method in ('cd', 'lars'):
            cov_, _, costs = graph_lasso(emp_cov, alpha=.1, return_costs=True)
            covs[method] = cov_
            costs, dual_gap = np.array(costs).T
            # Check that the costs always decrease
            assert_array_less(np.diff(costs), 0)
        # Check that the 2 approaches give similar results
        assert_array_almost_equal(covs['cd'], covs['lars'])

    # Smoke test the estimator
    model = GraphLasso(alpha=.1).fit(X)
    assert_array_almost_equal(model.covariance_, covs['cd'])
Esempio n. 3
0
def mk_spd(dim, alpha, maxc, minc, rs):
    prec = make_sparse_spd_matrix(dim=dim,
                                  alpha=alpha,
                                  largest_coef=maxc,
                                  smallest_coef=minc,
                                  random_state=rs)
    prec = prec.tolist()
    return [prec]
Esempio n. 4
0
def test_graph_lasso_cv(random_state=1):
    # Sample data from a sparse multivariate normal
    dim = 5
    n_samples = 6
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.96, random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    # Capture stdout, to smoke test the verbose mode
    orig_stdout = sys.stdout
    try:
        sys.stdout = StringIO()
        # We need verbose very high so that Parallel prints on stdout
        GraphLassoCV(verbose=100, alphas=3).fit(X)
    finally:
        sys.stdout = orig_stdout
Esempio n. 5
0
def test_deprecated_grid_scores(random_state=1):
    dim = 5
    n_samples = 6
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.96, random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    graphical_lasso = GraphicalLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1)
    graphical_lasso.fit(X)

    depr_message = ("Attribute grid_scores was deprecated in version "
                    "0.19 and will be removed in 0.21. Use "
                    "``grid_scores_`` instead")

    with pytest.warns(DeprecationWarning, match=depr_message):
        assert_equal(graphical_lasso.grid_scores, graphical_lasso.grid_scores_)
def test_graph_lasso_cv(random_state=1):
    # Sample data from a sparse multivariate normal
    dim = 5
    n_samples = 6
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.96,
                                  random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    # Capture stdout, to smoke test the verbose mode
    orig_stdout = sys.stdout
    try:
        sys.stdout = StringIO()
        GraphLassoCV(verbose=10, alphas=3).fit(X)
    finally:
        sys.stdout = orig_stdout
def test_deprecated_grid_scores(random_state=1):
    dim = 5
    n_samples = 6
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.96,
                                  random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    graphical_lasso = GraphicalLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1)
    graphical_lasso.fit(X)

    depr_message = ("Attribute grid_scores was deprecated in version "
                    "0.19 and will be removed in 0.21. Use "
                    "``grid_scores_`` instead")

    with pytest.warns(DeprecationWarning, match=depr_message):
        assert_equal(graphical_lasso.grid_scores, graphical_lasso.grid_scores_)
def test_graphical_lasso(random_state=0):
    # Sample data from a sparse multivariate normal
    dim = 20
    n_samples = 100
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.95, random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    emp_cov = empirical_covariance(X)

    for alpha in (0., .1, .25):
        covs = dict()
        icovs = dict()
        for method in ('cd', 'lars'):
            cov_, icov_, costs = graphical_lasso(emp_cov,
                                                 return_costs=True,
                                                 alpha=alpha,
                                                 mode=method)
            covs[method] = cov_
            icovs[method] = icov_
            costs, dual_gap = np.array(costs).T
            # Check that the costs always decrease (doesn't hold if alpha == 0)
            if not alpha == 0:
                assert_array_less(np.diff(costs), 0)
        # Check that the 2 approaches give similar results
        assert_array_almost_equal(covs['cd'], covs['lars'], decimal=4)
        assert_array_almost_equal(icovs['cd'], icovs['lars'], decimal=4)

    # Smoke test the estimator
    model = GraphicalLasso(alpha=.25).fit(X)
    model.score(X)
    assert_array_almost_equal(model.covariance_, covs['cd'], decimal=4)
    assert_array_almost_equal(model.covariance_, covs['lars'], decimal=4)

    # For a centered matrix, assume_centered could be chosen True or False
    # Check that this returns indeed the same result for centered data
    Z = X - X.mean(0)
    precs = list()
    for assume_centered in (False, True):
        prec_ = GraphicalLasso(
            assume_centered=assume_centered).fit(Z).precision_
        precs.append(prec_)
    assert_array_almost_equal(precs[0], precs[1])
def test_graphical_lasso(random_state=0):
    # Sample data from a sparse multivariate normal
    dim = 20
    n_samples = 100
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.95,
                                  random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    emp_cov = empirical_covariance(X)

    for alpha in (0., .1, .25):
        covs = dict()
        icovs = dict()
        for method in ('cd', 'lars'):
            cov_, icov_, costs = graphical_lasso(emp_cov, return_costs=True,
                                                 alpha=alpha, mode=method)
            covs[method] = cov_
            icovs[method] = icov_
            costs, dual_gap = np.array(costs).T
            # Check that the costs always decrease (doesn't hold if alpha == 0)
            if not alpha == 0:
                assert_array_less(np.diff(costs), 0)
        # Check that the 2 approaches give similar results
        assert_array_almost_equal(covs['cd'], covs['lars'], decimal=4)
        assert_array_almost_equal(icovs['cd'], icovs['lars'], decimal=4)

    # Smoke test the estimator
    model = GraphicalLasso(alpha=.25).fit(X)
    model.score(X)
    assert_array_almost_equal(model.covariance_, covs['cd'], decimal=4)
    assert_array_almost_equal(model.covariance_, covs['lars'], decimal=4)

    # For a centered matrix, assume_centered could be chosen True or False
    # Check that this returns indeed the same result for centered data
    Z = X - X.mean(0)
    precs = list()
    for assume_centered in (False, True):
        prec_ = GraphicalLasso(
            assume_centered=assume_centered).fit(Z).precision_
        precs.append(prec_)
    assert_array_almost_equal(precs[0], precs[1])
def test_graphical_lasso_cv(random_state=1):
    # Sample data from a sparse multivariate normal
    dim = 5
    n_samples = 6
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.96,
                                  random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    # Capture stdout, to smoke test the verbose mode
    orig_stdout = sys.stdout
    try:
        sys.stdout = StringIO()
        # We need verbose very high so that Parallel prints on stdout
        GraphicalLassoCV(verbose=100, alphas=5, tol=1e-1).fit(X)
    finally:
        sys.stdout = orig_stdout

    # Smoke test with specified alphas
    GraphicalLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1).fit(X)
def prof_graph_lasso_cv(random_state_seed=1):
    # Sample data from a sparse multivariate normal
    dim = 10  # 80
    n_samples = 60

    # Generate input data
    random_state = check_random_state(random_state_seed)
    prec = make_sparse_spd_matrix(dim, alpha=.96, random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)

    utils.cache_value(X, "prof_graph_lasso_cv/X_%d_%d_%d" %
                      (dim, n_samples, random_state_seed))

    # Test with alphas as integer
    ## mode = 'cd'
    ## gl1 = utils.timeit(GraphLassoCV(verbose=1, alphas=3, mode=mode).fit)(X)
    ## utils.cache_value(gl1.covariance_,
    ##                   "prof_graph_lasso_cv/covariance_%d_%d_%d" %
    ##                   (dim, n_samples, random_state_seed))
    ## utils.cache_value(gl1.precision_,
    ##                   "prof_graph_lasso_cv/precision_%d_%d_%d" %
    ##                   (dim, n_samples, random_state_seed))

    # Test with alphas as list.
    # Take same alphas as were found in the first step, check the result
    # is the same.
    ## gl2 = utils.timeit(GraphLassoCV(alphas=gl1.cv_alphas_, n_jobs=1,
    ##                                 mode=mode).fit)(X)
    ## np.testing.assert_almost_equal(gl1.covariance_, gl2.covariance_,
    ##                                decimal=3)
    ## np.testing.assert_almost_equal(gl1.precision_, gl2.precision_,
    ##                                decimal=3)
    ## np.testing.assert_almost_equal(gl1.alpha_, gl2.alpha_)

    # Smoke test with an alternate cross-validation object.
    gl3 = utils.timeit(GraphLassoCV(cv=KFold(n=X.shape[0], n_folds=20),
                                    n_jobs=1).fit)(X)
Esempio n. 12
0
def generate_cov_learn_dataset_repeat(n_signals=50,
                                      n_features=15,
                                      n_samples=100,
                                      alpha=0.95,
                                      repeats=10,
                                      random_state=0,
                                      verbose=True,
                                      normalize=False,
                                      laplace=False,
                                      permute_repeats=False,
                                      graphType="random",
                                      mix_with_random=False,
                                      smallest_coef=-.9):
    true_covariances = []
    true_precisions = []
    noised_covariances = []
    sigs = []
    I = np.eye(n_features)
    ind = np.arange(0, n_features)
    ind2 = ind.copy()
    for i in range(n_samples):
        if (graphType == 'smallWorld'):
            if (mix_with_random and np.random.rand(1)[0] < 0.5):
                prec = make_sparse_spd_matrix(n_features,
                                              alpha=alpha,
                                              smallest_coef=smallest_coef,
                                              random_state=i + random_state)
            else:
                data = rags2ridges.createS(10,
                                           n_features,
                                           topology="small-world",
                                           precision=True)
                prec = np.array(data)
                np.random.shuffle(ind2)
                I = np.eye(prec.shape[1])
                P = I[:, ind2]
                C = np.zeros((prec.shape[1], prec.shape[1]))
                C[np.triu_indices(n_features,
                                  k=1)] = prec[np.triu_indices(n_features,
                                                               k=1)]
                C = C + C.T
                C = P.dot(C).dot(P.T)
                prec = C + I
        else:
            prec = make_sparse_spd_matrix(n_features,
                                          alpha=alpha,
                                          smallest_coef=smallest_coef,
                                          random_state=i + random_state)

        cov = np.linalg.inv(prec)
        for j in range(repeats):
            if (laplace):
                # see prop 3.1 in "A multivariate generalization of the power exponential family of distributions" E. Gomez et al 1998
                E = np.tile(np.random.exponential(scale=1.0, size=n_signals),
                            (n_features, 1)).T
                Z = np.random.multivariate_normal(np.zeros(n_features), cov,
                                                  n_signals)
                X = (E)**(0.5) * Z
            else:
                X = generate_signal_from_covariance(cov,
                                                    samples=n_signals,
                                                    random_state=i + j +
                                                    random_state + 1)

            if (normalize):
                X -= X.mean(axis=0)
                std = X.std(axis=0)
                std[std == 0] = 1
                X /= std
            cov_emp = X.T.dot(X) / X.shape[0]
            if (permute_repeats):
                #This seems to mess up
                np.random.shuffle(ind)
                P = I[:, ind]
                cov_emp = P.T.dot(cov_emp).dot(P)
                true_covariances.append(P.dot(cov).dot(P.T))
                true_precisions.append(P.dot(prec).dot(P.T))
            else:
                true_covariances.append(cov)
                true_precisions.append(prec)
            noised_covariances.append(cov_emp)
            sigs.append(X)
        if (verbose):
            sys.stdout.write("\r%.2f%%" % (float(i * 100) / n_samples))
            sys.stdout.flush()
    return true_covariances, true_precisions, noised_covariances, sigs