def test_with_stratified_bootstrap():
    n, p, k = 1000, 1000, 5

    X, y, important_betas = _generate_dummy_classification_data(n=n, k=k)
    selector = StabilitySelection(lambda_grid=np.logspace(-5, -1, 25),
                                  verbose=1,
                                  bootstrap_func='stratified')
    selector.fit(X, y)

    chosen_betas = selector.get_support(indices=True)
    assert_almost_equal(important_betas, chosen_betas)
def test_issparse():
    n, p = 200, 200
    rho = 0.6
    weakness = 0.2

    X, y = generate_experiment_data(n, p, rho)
    lambda_grid = np.linspace(0.01, 0.5, num=100)

    estimator = RandomizedLasso(weakness=weakness)
    selector = StabilitySelection(base_estimator=estimator,
                                  lambda_name='alpha',
                                  lambda_grid=lambda_grid,
                                  threshold=0.9,
                                  verbose=1)
    selector.fit(csr_matrix(X), y)
def test_stability_plot():
    n, p, k = 500, 200, 5

    X, y, important_betas = _generate_dummy_regression_data(n=n, k=k)

    base_estimator = Pipeline([('scaler', StandardScaler()),
                               ('model', Lasso())])

    lambdas_grid = np.logspace(-1, 1, num=10)

    selector = StabilitySelection(base_estimator=base_estimator,
                                  lambda_name='model__alpha',
                                  lambda_grid=lambdas_grid)
    selector.fit(X, y)

    plot_stability_path(selector, threshold_highlight=0.5)
def test_no_features():
    n, p, k = 100, 200, 0

    X, y, important_betas = _generate_dummy_regression_data(n=n, k=k)

    base_estimator = Pipeline([('scaler', StandardScaler()),
                               ('model', Lasso())])

    lambdas_grid = np.logspace(-1, 1, num=10)

    selector = StabilitySelection(base_estimator=base_estimator,
                                  lambda_name='model__alpha',
                                  lambda_grid=lambdas_grid)
    selector.fit(X, y)

    assert_almost_equal(selector.transform(X),
                        np.empty(0).reshape((X.shape[0], 0)))
 def stability_selection(self, X, y):
     """
     Wrapper around stability-selection package which is based on the stability selection feature selection
     algorithm [1]. Bootstrap can be performed using complementary pairs subsampling [2].
     https://github.com/scikit-learn-contrib/stability-selection
     [1]: Meinshausen, N. and Buhlmann, P., 2010. Stability selection. Journal of the Royal Statistical Society:
     Series B (Statistical Methodology), 72(4), pp.417-473.
     [2] Shah, R.D. and Samworth, R.J., 2013. Variable selection with error control: another look at stability
     selection. Journal of the Royal Statistical Society: Series B (Statistical Methodology), 75(1), pp.55-80.
     """
     init_params_dic = {
         'base_estimator':
         self.fit_params.get(
             'base_estimator',
             LogisticRegression(penalty='l1', solver='liblinear')),
         'lambda_name':
         self.fit_params.get('lambda_name', 'C'),
         'lambda_grid':
         self.fit_params.get('lambda_grid', np.logspace(-5, -2, 25)),
         'n_bootstrap_iterations':
         self.fit_params.get('n_bootstrap_iterations', self.n_bsamples),
         'sample_fraction':
         self.fit_params.get('sample_fraction', 0.5),
         'threshold':
         self.fit_params.get('threshold', 0.6),
         'bootstrap_func':
         self.fit_params.get('bootstrap_func', 'subsample'),
         'bootstrap_threshold':
         self.fit_params.get('bootstrap_threshold', None),
         'verbose':
         self.fit_params.get('verbose', 0),
         'n_jobs':
         self.fit_params.get('n_jobs', 1),
         'pre_dispatch':
         self.fit_params.get('pre_dispatch', '2*n_jobs'),
         'random_state':
         self.fit_params.get('random_state', self.random_state)
     }
     fit_params = self.fit_params.copy()
     for init_params in init_params_dic:
         if init_params in fit_params:
             fit_params.pop(init_params)
     feature_selector = StabilitySelection(**init_params_dic)
     feature_selector.fit(X, y)
     self.accepted_features_index = feature_selector.get_support(
         indices=True)
def test_stability_selection_regression():
    n, p, k = 500, 1000, 5

    X, y, important_betas = _generate_dummy_regression_data(n=n, k=k)

    base_estimator = Pipeline([('scaler', StandardScaler()),
                               ('model', Lasso())])

    lambdas_grid = np.logspace(-1, 1, num=10)

    selector = StabilitySelection(base_estimator=base_estimator,
                                  lambda_name='model__alpha',
                                  lambda_grid=lambdas_grid)
    selector.fit(X, y)

    chosen_betas = selector.get_support(indices=True)

    assert_almost_equal(important_betas, chosen_betas)
def test_randomized_lasso():
    n, p = 200, 200
    rho = 0.6
    weakness = 0.2

    X, y = generate_experiment_data(n, p, rho)
    lambda_grid = np.linspace(0.01, 0.5, num=100)

    estimator = RandomizedLasso(weakness=weakness)
    selector = StabilitySelection(base_estimator=estimator,
                                  lambda_name='alpha',
                                  lambda_grid=lambda_grid,
                                  threshold=0.9,
                                  verbose=1)
    selector.fit(X, y)

    chosen_betas = selector.get_support(indices=True)

    assert_almost_equal(np.array([0, 1]), chosen_betas)
def test_different_shape():
    n, p, k = 100, 200, 5

    X, y, important_betas = _generate_dummy_regression_data(n=n, k=k)

    base_estimator = Pipeline([('scaler', StandardScaler()),
                               ('model', Lasso())])

    lambdas_grid = np.logspace(-1, 1, num=10)

    selector = StabilitySelection(base_estimator=base_estimator,
                                  lambda_name='model__alpha',
                                  lambda_grid=lambdas_grid)
    selector.fit(X, y)
    selector.transform(X[:, :-2])
def test_stability_selection_classification():
    n, p, k = 1000, 1000, 5

    X, y, important_betas = _generate_dummy_classification_data(n=n, k=k)
    selector = StabilitySelection(lambda_grid=np.logspace(-5, -1, 25),
                                  verbose=1)
    selector.fit(X, y)

    chosen_betas = selector.get_support(indices=True)
    X_r = selector.transform(X)

    assert_almost_equal(important_betas, chosen_betas)
    assert (X_r.shape == (n, k))
    assert (selector.stability_scores_.shape == (
        p, selector.lambda_grid.shape[0]))
Exemple #10
0
#Data transforming
for col in all_cate_var:
    dummy = pd.get_dummies(binary_data[col],prefix = col)
    dummy.drop(dummy.columns[0],axis = 1,inplace = True)
    binary_data.drop(col,axis = 1,inplace = True)
    binary_data = pd.concat([binary_data, dummy], axis = 1)

train_x = np.array(binary_data.drop(columns=['outcome']))
train_y = np.array(binary_data['outcome']).astype('int')

#Feature Selection
from stability_selection import StabilitySelection
from sklearn.svm import LinearSVC

lsvc = LinearSVC()
selector = StabilitySelection(base_estimator = lsvc).fit(train_x, train_y)
train_x_name = binary_data.drop(columns=['outcome']).columns
score_table = pd.DataFrame(np.mean(selector.stability_scores_,axis = 1),train_x_name,columns = ['Scores'])
score_table = score_table.sort_values(['Scores'],ascending=False)[0:30]

train_x_varselect = binary_data[train_x_name[selector.get_support()]]

#Modeling
from sklearn.linear_model import LogisticRegression
Logistic_model = LogisticRegression()
Logistic_model.fit(train_x_varselect, train_y)

#CV
from sklearn.model_selection import cross_val_score
Logistic_scores = np.mean(cross_val_score(Logistic_model, train_x_varselect, train_y))
Exemple #11
0
def test_check_string_threshold():
    StabilitySelection(threshold='wrong_value')._validate_input()
Exemple #12
0
def test_check_threshold_too_large():
    StabilitySelection(threshold=1.5)._validate_input()
Exemple #13
0
	from stability_selection import StabilitySelection

    if model == 'LogisticRegression':
    	clf = LogisticRegression(penalty='l1', class_weight='balanced', solver='auto',random_state= )

    if model == 'LogisticRegression':
    	clf = RidgeClassifier(alpha=1.0, class_weight='balanced', solver='auto', random_state=None)

	self.model = clf

	base_estimator = Pipeline([
        ('scaler', StandardScaler()),
        ('model', model)
    ])

    selector = StabilitySelection(base_estimator=base_estimator, lambda_name='model__C',
                                  lambda_grid=np.logspace(-5, -1, 50))
    selector.fit(X, y)

    fig, ax = plot_stability_path(selector)
    fig.show()

    selected_variables = selector.get_support(indices=True)
    selected_scores = selector.stability_scores_.max(axis=1)

    print('Selected variables are:')
    print('-----------------------')

    for idx, (variable, score) in enumerate(zip(selected_variables, selected_scores[selected_variables])):
        print('Variable %d: [%d], score %.3f' % (idx + 1, variable, score))

    sigma[2, 1] = rho

    X = rng.multivariate_normal(mean=np.zeros(p), cov=sigma, size=(n, ))
    beta = np.zeros(p)
    beta[:2] = 1.0
    epsilon = rng.normal(0.0, 0.25, size=(n, ))

    y = np.matmul(X, beta) + epsilon

    return X, y


if __name__ == '__main__':
    n, p = 200, 200
    rho = 0.6

    X, y = generate_experiment_data()
    lambda_grid = np.linspace(0.001, 0.5, num=100)

    for weakness in [0.2, 0.5, 1.0]:
        estimator = RandomizedLasso(weakness=weakness)
        selector = StabilitySelection(base_estimator=estimator,
                                      lambda_name='alpha',
                                      lambda_grid=lambda_grid,
                                      threshold=0.9,
                                      verbose=1)
        selector.fit(X, y)

        fig, ax = plot_stability_path(selector)
        fig.show()
Exemple #15
0
def test_check_arguments():
    StabilitySelection(threshold='wrong_value')._validate_input()
Exemple #16
0
def test_lambda_name():
    StabilitySelection(lambda_name='n_estimators')._validate_input()
Exemple #17
0
def test_sample_fraction():
    StabilitySelection(sample_fraction=0.0)._validate_input()
Exemple #18
0
def test_callable_bootstrap_func():
    StabilitySelection(bootstrap_func=0.5)._validate_input()
Exemple #19
0
def test_check_threshold_too_small():
    StabilitySelection(threshold=0.0)._validate_input()
Exemple #20
0
def test_automatic_lambda_grid():
    selector = StabilitySelection()
    selector._validate_input()
    assert_array_equal(np.logspace(-5, -2, 25), selector.lambda_grid)
Exemple #21
0
def test_check_wrong_lambda_name():
    StabilitySelection(n_bootstrap_iterations=-1)._validate_input()
Exemple #22
0
def test_check_wrong_lambda_name():
    StabilitySelection(lambda_name='alpha')._validate_input()
Exemple #23
0
def test_transformer():
    # With defaults this can fail because in the low sample size case
    # some of the bootstrap samples can have zero cases of the positive class
    return check_estimator(
        StabilitySelection(n_bootstrap_iterations=10, sample_fraction=1.0))
Exemple #24
0
def test_bootstrap_func():
    StabilitySelection(bootstrap_func='nonexistent')._validate_input()
Exemple #25
0
def test_check_threshold_too_small():
    StabilitySelection().get_support(threshold='wrong_value')