Beispiel #1
0
def model_average(X, penalization):
    '''Run ModelAverage in default mode (QuicGraphLassoCV) to obtain proportion
    matrix.

    NOTE:  This returns precision_ proportions, not cov, prec estimates, so we 
           return the raw proportions for "cov" and the threshold support estimate
           for prec.
    '''
    n_trials = 100
    print 'ModelAverage with:'
    print '   estimator: QuicGraphLasso (default)'
    print '   n_trials: {}'.format(n_trials)
    print '   penalization: {}'.format(penalization)

    # if penalization is random, first find a decent scalar lam_ to build
    # random perturbation matrix around.  lam doesn't matter for fully-random.
    lam = 0.5
    if penalization == 'random':
        cv_model = QuicGraphLassoCV(cv=2,
                                    n_refinements=6,
                                    n_jobs=1,
                                    init_method='cov',
                                    score_metric=metric)
        cv_model.fit(X)
        lam = cv_model.lam_
        print '   lam: {}'.format(lam)

    model = ModelAverage(n_trials=n_trials,
                         penalization=penalization,
                         lam=lam,
                         n_jobs=1)
    model.fit(X)
    print '   lam_: {}'.format(model.lam_)
    return model.proportion_, model.support_, model.lam_
Beispiel #2
0
    def test_integration_quic_graph_lasso_cv(self, params_in):
        '''
        Just tests inputs/outputs (not validity of result).
        '''
        n_features = 10
        n_samples = 10
        cov, prec, adj = ClusterGraph(
            n_blocks=1,
            chain_blocks=False,
            seed=1,
        ).create(n_features, 0.8)
        prng = np.random.RandomState(2)
        X = prng.multivariate_normal(np.zeros(n_features), cov, size=n_samples)

        ma = ModelAverage(**params_in)
        ma.fit(X)

        n_examples, n_features = X.shape

        assert ma.proportion_.shape == (n_features, n_features)
        assert len(ma.estimators_) == ma.n_trials
        assert len(ma.subsets_) == ma.n_trials
        if not ma.penalization == 'subsampling':
            assert len(ma.lams_) == ma.n_trials
        else:
            assert len(ma.lams_) == ma.n_trials
            assert ma.lams_[0] is None

        for eidx, e in enumerate(ma.estimators_):
            assert isinstance(e, params_in['estimator'].__class__)
            
            # sklearn doesnt have this but ours do
            if hasattr(e, 'is_fitted'):
                assert e.is_fitted == True

            # check that all lambdas used where different
            if not ma.penalization == 'subsampling' and eidx > 0:
                if hasattr(e, 'lam'):
                    prev_e = ma.estimators_[eidx - 1]
                    assert np.linalg.norm((prev_e.lam - e.lam).flat) > 0

        if ma.normalize == True:
            assert np.max(ma.proportion_) <= 1.0
        else:        
            assert np.max(ma.proportion_) <= ma.n_trials
                
        assert np.min(ma.proportion_) >= 0.0
        assert np.max(ma.proportion_) > 0.0
Beispiel #3
0
    def test_integration_quic_graph_lasso_cv(self, params_in):
        '''
        Just tests inputs/outputs (not validity of result).
        '''
        X = datasets.load_diabetes().data
        ma = ModelAverage(**params_in)
        ma.fit(X)

        n_examples, n_features = X.shape

        assert ma.proportion_.shape == (n_features, n_features)
        if ma.use_cache:
            assert len(ma.estimators_) == ma.n_trials
            assert len(ma.subsets_) == ma.n_trials
            if not ma.penalization == 'subsampling':
                assert len(ma.lams_) == ma.n_trials
            else:
                assert len(ma.lams_) == 0
        else:
            assert len(ma.estimators_) == 0
            assert len(ma.lams_) == 0
            assert len(ma.subsets_) == 0

        for eidx, e in enumerate(ma.estimators_):
            assert isinstance(e, params_in['estimator'].__class__)

            # sklearn doesnt have this but ours do
            if hasattr(e, 'is_fitted'):
                assert e.is_fitted == True

            # check that all lambdas used where different
            if not ma.penalization == 'subsampling' and eidx > 0:
                if hasattr(e, 'lam'):
                    prev_e = ma.estimators_[eidx - 1]
                    assert np.linalg.norm((prev_e.lam - e.lam).flat) > 0

        if ma.normalize == True:
            assert np.max(ma.proportion_) <= 1.0
        else:
            assert np.max(ma.proportion_) <= ma.n_trials

        assert np.min(ma.proportion_) >= 0.0
        assert np.max(ma.proportion_) > 0.0
def model_average(X, penalization):
    """Run ModelAverage in default mode (QuicGraphicalLassoCV) to obtain proportion
    matrix.

    NOTE:  This returns precision_ proportions, not cov, prec estimates, so we
           return the raw proportions for "cov" and the threshold support
           estimate for prec.
    """
    n_trials = 100
    print("ModelAverage with:")
    print("   estimator: QuicGraphicalLasso (default)")
    print("   n_trials: {}".format(n_trials))
    print("   penalization: {}".format(penalization))

    # if penalization is random, first find a decent scalar lam_ to build
    # random perturbation matrix around.  lam doesn't matter for fully-random.
    lam = 0.5
    if penalization == "random":
        cv_model = QuicGraphicalLassoCV(
            cv=2,
            n_refinements=6,
            sc=spark.sparkContext,  # NOQA
            init_method="cov",
            score_metric=metric,
        )
        cv_model.fit(X)
        lam = cv_model.lam_
        print("   lam: {}".format(lam))

    model = ModelAverage(n_trials=n_trials,
                         penalization=penalization,
                         lam=lam,
                         sc=spark.sparkContext)  # NOQA
    model.fit(X)
    print("   lam_: {}".format(model.lam_))
    return model.proportion_, model.support_, model.lam_
def adaptive_model_average(X, penalization, method):
    """Run ModelAverage in default mode (QuicGraphicalLassoCV) to obtain proportion
    matrix.

    NOTE:  Only method = 'binary' really makes sense in this case.
    """
    n_trials = 100
    print("Adaptive ModelAverage with:")
    print("   estimator: QuicGraphicalLasso (default)")
    print("   n_trials: {}".format(n_trials))
    print("   penalization: {}".format(penalization))
    print("   adaptive-method: {}".format(method))

    # if penalization is random, first find a decent scalar lam_ to build
    # random perturbation matrix around. lam doesn't matter for fully-random.
    lam = 0.5
    if penalization == "random":
        cv_model = QuicGraphicalLassoCV(
            cv=2,
            n_refinements=6,
            sc=spark.sparkContext,  # NOQA
            init_method="cov",
            score_metric=metric,
        )
        cv_model.fit(X)
        lam = cv_model.lam_
        print("   lam: {}".format(lam))

    model = AdaptiveGraphicalLasso(
        estimator=ModelAverage(n_trials=n_trials,
                               penalization=penalization,
                               lam=lam,
                               sc=spark.sparkContext),  # NOQA
        method=method,
    )
    model.fit(X)
    lam_norm_ = np.linalg.norm(model.estimator_.lam_)
    print("   ||lam_||_2: {}".format(lam_norm_))
    return model.estimator_.covariance_, model.estimator_.precision_, lam_norm_
Beispiel #6
0
def adaptive_model_average(X, penalization, method):
    '''Run ModelAverage in default mode (QuicGraphLassoCV) to obtain proportion
    matrix.

    NOTE:  Only method = 'binary' really makes sense in this case.
    '''
    n_trials = 100
    print 'Adaptive ModelAverage with:'
    print '   estimator: QuicGraphLasso (default)'
    print '   n_trials: {}'.format(n_trials)
    print '   penalization: {}'.format(penalization)
    print '   adaptive-method: {}'.format(method)

    # if penalization is random, first find a decent scalar lam_ to build
    # random perturbation matrix around. lam doesn't matter for fully-random.
    lam = 0.5
    if penalization == 'random':
        cv_model = QuicGraphLassoCV(cv=2,
                                    n_refinements=6,
                                    n_jobs=1,
                                    init_method='cov',
                                    score_metric=metric)
        cv_model.fit(X)
        lam = cv_model.lam_
        print '   lam: {}'.format(lam)

    model = AdaptiveGraphLasso(
        estimator=ModelAverage(n_trials=n_trials,
                               penalization=penalization,
                               lam=lam,
                               n_jobs=1),
        method=method,
    )
    model.fit(X)
    lam_norm_ = np.linalg.norm(model.estimator_.lam_)
    print '   ||lam_||_2: {}'.format(lam_norm_)
    return model.estimator_.covariance_, model.estimator_.precision_, lam_norm_
    verbose=verbose,
)
ae.fit()
ae.show()
plt.suptitle('QuicGraphLassoEBIC (BIC)')

# average plots for QuicGraphLassoCV
ae = AverageError(
    model_selection_estimator=QuicGraphLassoCV(),
    n_features=n_features,
    n_trials=n_trials,
    verbose=verbose,
)
ae.fit()
ae.show()
plt.suptitle('QuicGraphLassoCV')

# average plots for ModelAverage + CV
ae = AverageError(
    model_selection_estimator=ModelAverage(n_trials=20,
                                           penalization='random',
                                           lam=0.2),
    n_features=n_features,
    n_trials=n_trials,
    verbose=verbose,
)
ae.fit()
ae.show()
plt.suptitle('ModelAverage CV')

raw_input()