def model_average(X, penalization): '''Run ModelAverage in default mode (QuicGraphLassoCV) to obtain proportion matrix. NOTE: This returns precision_ proportions, not cov, prec estimates, so we return the raw proportions for "cov" and the threshold support estimate for prec. ''' n_trials = 100 print 'ModelAverage with:' print ' estimator: QuicGraphLasso (default)' print ' n_trials: {}'.format(n_trials) print ' penalization: {}'.format(penalization) # if penalization is random, first find a decent scalar lam_ to build # random perturbation matrix around. lam doesn't matter for fully-random. lam = 0.5 if penalization == 'random': cv_model = QuicGraphLassoCV(cv=2, n_refinements=6, n_jobs=1, init_method='cov', score_metric=metric) cv_model.fit(X) lam = cv_model.lam_ print ' lam: {}'.format(lam) model = ModelAverage(n_trials=n_trials, penalization=penalization, lam=lam, n_jobs=1) model.fit(X) print ' lam_: {}'.format(model.lam_) return model.proportion_, model.support_, model.lam_
def test_integration_quic_graph_lasso_cv(self, params_in): ''' Just tests inputs/outputs (not validity of result). ''' n_features = 10 n_samples = 10 cov, prec, adj = ClusterGraph( n_blocks=1, chain_blocks=False, seed=1, ).create(n_features, 0.8) prng = np.random.RandomState(2) X = prng.multivariate_normal(np.zeros(n_features), cov, size=n_samples) ma = ModelAverage(**params_in) ma.fit(X) n_examples, n_features = X.shape assert ma.proportion_.shape == (n_features, n_features) assert len(ma.estimators_) == ma.n_trials assert len(ma.subsets_) == ma.n_trials if not ma.penalization == 'subsampling': assert len(ma.lams_) == ma.n_trials else: assert len(ma.lams_) == ma.n_trials assert ma.lams_[0] is None for eidx, e in enumerate(ma.estimators_): assert isinstance(e, params_in['estimator'].__class__) # sklearn doesnt have this but ours do if hasattr(e, 'is_fitted'): assert e.is_fitted == True # check that all lambdas used where different if not ma.penalization == 'subsampling' and eidx > 0: if hasattr(e, 'lam'): prev_e = ma.estimators_[eidx - 1] assert np.linalg.norm((prev_e.lam - e.lam).flat) > 0 if ma.normalize == True: assert np.max(ma.proportion_) <= 1.0 else: assert np.max(ma.proportion_) <= ma.n_trials assert np.min(ma.proportion_) >= 0.0 assert np.max(ma.proportion_) > 0.0
def test_integration_quic_graph_lasso_cv(self, params_in): ''' Just tests inputs/outputs (not validity of result). ''' X = datasets.load_diabetes().data ma = ModelAverage(**params_in) ma.fit(X) n_examples, n_features = X.shape assert ma.proportion_.shape == (n_features, n_features) if ma.use_cache: assert len(ma.estimators_) == ma.n_trials assert len(ma.subsets_) == ma.n_trials if not ma.penalization == 'subsampling': assert len(ma.lams_) == ma.n_trials else: assert len(ma.lams_) == 0 else: assert len(ma.estimators_) == 0 assert len(ma.lams_) == 0 assert len(ma.subsets_) == 0 for eidx, e in enumerate(ma.estimators_): assert isinstance(e, params_in['estimator'].__class__) # sklearn doesnt have this but ours do if hasattr(e, 'is_fitted'): assert e.is_fitted == True # check that all lambdas used where different if not ma.penalization == 'subsampling' and eidx > 0: if hasattr(e, 'lam'): prev_e = ma.estimators_[eidx - 1] assert np.linalg.norm((prev_e.lam - e.lam).flat) > 0 if ma.normalize == True: assert np.max(ma.proportion_) <= 1.0 else: assert np.max(ma.proportion_) <= ma.n_trials assert np.min(ma.proportion_) >= 0.0 assert np.max(ma.proportion_) > 0.0
def model_average(X, penalization): """Run ModelAverage in default mode (QuicGraphicalLassoCV) to obtain proportion matrix. NOTE: This returns precision_ proportions, not cov, prec estimates, so we return the raw proportions for "cov" and the threshold support estimate for prec. """ n_trials = 100 print("ModelAverage with:") print(" estimator: QuicGraphicalLasso (default)") print(" n_trials: {}".format(n_trials)) print(" penalization: {}".format(penalization)) # if penalization is random, first find a decent scalar lam_ to build # random perturbation matrix around. lam doesn't matter for fully-random. lam = 0.5 if penalization == "random": cv_model = QuicGraphicalLassoCV( cv=2, n_refinements=6, sc=spark.sparkContext, # NOQA init_method="cov", score_metric=metric, ) cv_model.fit(X) lam = cv_model.lam_ print(" lam: {}".format(lam)) model = ModelAverage(n_trials=n_trials, penalization=penalization, lam=lam, sc=spark.sparkContext) # NOQA model.fit(X) print(" lam_: {}".format(model.lam_)) return model.proportion_, model.support_, model.lam_
def adaptive_model_average(X, penalization, method): """Run ModelAverage in default mode (QuicGraphicalLassoCV) to obtain proportion matrix. NOTE: Only method = 'binary' really makes sense in this case. """ n_trials = 100 print("Adaptive ModelAverage with:") print(" estimator: QuicGraphicalLasso (default)") print(" n_trials: {}".format(n_trials)) print(" penalization: {}".format(penalization)) print(" adaptive-method: {}".format(method)) # if penalization is random, first find a decent scalar lam_ to build # random perturbation matrix around. lam doesn't matter for fully-random. lam = 0.5 if penalization == "random": cv_model = QuicGraphicalLassoCV( cv=2, n_refinements=6, sc=spark.sparkContext, # NOQA init_method="cov", score_metric=metric, ) cv_model.fit(X) lam = cv_model.lam_ print(" lam: {}".format(lam)) model = AdaptiveGraphicalLasso( estimator=ModelAverage(n_trials=n_trials, penalization=penalization, lam=lam, sc=spark.sparkContext), # NOQA method=method, ) model.fit(X) lam_norm_ = np.linalg.norm(model.estimator_.lam_) print(" ||lam_||_2: {}".format(lam_norm_)) return model.estimator_.covariance_, model.estimator_.precision_, lam_norm_
def adaptive_model_average(X, penalization, method): '''Run ModelAverage in default mode (QuicGraphLassoCV) to obtain proportion matrix. NOTE: Only method = 'binary' really makes sense in this case. ''' n_trials = 100 print 'Adaptive ModelAverage with:' print ' estimator: QuicGraphLasso (default)' print ' n_trials: {}'.format(n_trials) print ' penalization: {}'.format(penalization) print ' adaptive-method: {}'.format(method) # if penalization is random, first find a decent scalar lam_ to build # random perturbation matrix around. lam doesn't matter for fully-random. lam = 0.5 if penalization == 'random': cv_model = QuicGraphLassoCV(cv=2, n_refinements=6, n_jobs=1, init_method='cov', score_metric=metric) cv_model.fit(X) lam = cv_model.lam_ print ' lam: {}'.format(lam) model = AdaptiveGraphLasso( estimator=ModelAverage(n_trials=n_trials, penalization=penalization, lam=lam, n_jobs=1), method=method, ) model.fit(X) lam_norm_ = np.linalg.norm(model.estimator_.lam_) print ' ||lam_||_2: {}'.format(lam_norm_) return model.estimator_.covariance_, model.estimator_.precision_, lam_norm_
verbose=verbose, ) ae.fit() ae.show() plt.suptitle('QuicGraphLassoEBIC (BIC)') # average plots for QuicGraphLassoCV ae = AverageError( model_selection_estimator=QuicGraphLassoCV(), n_features=n_features, n_trials=n_trials, verbose=verbose, ) ae.fit() ae.show() plt.suptitle('QuicGraphLassoCV') # average plots for ModelAverage + CV ae = AverageError( model_selection_estimator=ModelAverage(n_trials=20, penalization='random', lam=0.2), n_features=n_features, n_trials=n_trials, verbose=verbose, ) ae.fit() ae.show() plt.suptitle('ModelAverage CV') raw_input()