def test_variable_selection(): """Test basic functionality of UoI_Lasso and that it finds right model""" X, y, w = make_regression(coef=True, random_state=1) lasso = UoI_Lasso(comm=MPI.COMM_WORLD) lasso.fit(X, y) true_coef = np.nonzero(w)[0] fit_coef = np.nonzero(lasso.coef_)[0] assert_array_equal(true_coef, fit_coef) assert_array_almost_equal_nulp(true_coef, fit_coef)
def test_variable_selection(): """Test basic functionality of UoI_Lasso and that it finds right model""" from sklearn.datasets import make_regression X, y, w = make_regression(coef=True, random_state=1) lasso = UoI_Lasso() lasso.fit(X, y) true_coef = np.nonzero(w)[0] fit_coef = np.nonzero(lasso.coef_)[0] assert_array_equal(true_coef, fit_coef) assert_array_almost_equal_nulp(true_coef, fit_coef)
def test_intercept(): """Test that UoI Lasso properly calculates the intercept when centering the response variable.""" X = np.array([[-1, 2], [0, 1], [1, 3], [4, 3]]) y = np.array([8, 5, 14, 17]) lasso = UoI_Lasso(normalize=False, fit_intercept=True) lasso.fit(X, y) assert lasso.intercept_ == np.mean(y) - np.dot(X.mean(axis=0), lasso.coef_)
def test_estimation_score_usage(): """Test the ability to change the estimation score in UoI Lasso.""" methods = ('r2', 'AIC', 'AICc', 'BIC') X, y = make_regression(n_features=10, n_informative=3, random_state=10) scores = [] for method in methods: lasso = UoI_Lasso(estimation_score=method) assert_equal(lasso.estimation_score, method) lasso.fit(X, y) score = np.max(lasso.scores_) scores.append(score) assert_equal(len(np.unique(scores)), len(methods))
def test_uoi_lasso_toy(): """Test UoI Lasso on a toy example.""" X = np.array([[-1, 2], [4, 1], [1, 3], [4, 3], [8, 11]], dtype=float) beta = np.array([1, 4], dtype=float) y = np.dot(X, beta) # choose selection_frac to be slightly smaller to ensure that we get # good test sets lasso = UoI_Lasso(fit_intercept=False, selection_frac=0.75, estimation_frac=0.75) lasso.fit(X, y) assert_allclose(lasso.coef_, beta)
def test_uoi_lasso_toy(): """Test UoI Lasso on a toy example.""" X = np.array([[-1, 2], [4, 1], [1, 3], [4, 3], [8, 11]], dtype=float) beta = np.array([1, 4], dtype=float) y = np.dot(X, beta) # choose selection_frac to be slightly smaller to ensure that we get # good test sets # Also test both choices of solver lasso = UoI_Lasso(fit_intercept=False, selection_frac=0.75, estimation_frac=0.75, solver='cd') lasso.fit(X, y) lasso.fit(X, y, verbose=True) assert_allclose(lasso.coef_, beta) if pycasso is not None: lasso = UoI_Lasso(fit_intercept=False, selection_frac=0.75, estimation_frac=0.75, solver='pyc') lasso.fit(X, y) assert_allclose(lasso.coef_, beta)
def test_intercept_and_coefs_no_selection(): """Test that UoI Lasso properly calculates the intercept with and without standardization.""" # create line model X, y, beta, intercept = make_linear_regression(n_samples=500, n_features=2, n_informative=2, snr=10., include_intercept=True, random_state=2332) # without standardization lasso = UoI_Lasso(standardize=False, fit_intercept=True) lasso.fit(X, y) assert_allclose(lasso.intercept_, intercept, rtol=0.25) assert_allclose(lasso.coef_, beta, rtol=0.25) # with standardization lasso = UoI_Lasso(standardize=True, fit_intercept=True) lasso.fit(X, y) assert_allclose(lasso.intercept_, intercept, rtol=0.25) assert_allclose(lasso.coef_, beta, rtol=0.25)
def test_set_random_state(): """Tests whether random states are handled correctly.""" X, y = make_regression(n_features=5, n_informative=3, random_state=16, noise=.5) # same state l1log_0 = UoI_Lasso(random_state=13) l1log_1 = UoI_Lasso(random_state=13) l1log_0.fit(X, y) l1log_1.fit(X, y) assert_array_equal(l1log_0.coef_, l1log_1.coef_) # different state l1log_1 = UoI_Lasso(random_state=14) l1log_1.fit(X, y) assert not np.array_equal(l1log_0.coef_, l1log_1.coef_) # different state, not set l1log_0 = UoI_Lasso() l1log_1 = UoI_Lasso() l1log_0.fit(X, y) l1log_1.fit(X, y) assert not np.array_equal(l1log_0.coef_, l1log_1.coef_)
def test_uoi_lasso_estimation_shape_match(): """Test UoI Lasso on a toy example.""" n_samples = 40 n_features = 10 X, y = make_regression(n_samples=n_samples, n_features=n_features, n_informative=5, random_state=1) lasso = UoI_Lasso() lasso.fit(X, y) with pytest.raises(ValueError, match='Targets and predictions are ' + 'not the same shape.'): support = np.ones(n_features).astype(bool) boot_idxs = [np.arange(n_samples)] * 2 lasso.coef_ = np.random.randn(2, n_features) lasso._score_predictions('r2', lasso, X, y, support, boot_idxs) with pytest.raises(ValueError, match='y should either have'): support = np.ones(n_features).astype(bool) boot_idxs = [np.arange(n_samples)] * 2 lasso._score_predictions('r2', lasso, X, y[:, np.newaxis, np.newaxis], support, boot_idxs)
def test_uoi_lasso_fit_shape_match(): """Test UoI Lasso on a toy example.""" n_samples = 40 n_features = 10 X, y = make_regression(n_samples=n_samples, n_features=n_features, n_informative=5, random_state=1) lasso = UoI_Lasso() # Check that second axis gets squeezed lasso.fit(X, y[:, np.newaxis]) # Check that second axis gets squeezed message = 'y should either have shape' with pytest.raises(ValueError, match=message): lasso.fit(X, np.tile(y[:, np.newaxis], (1, 2))) with pytest.raises(ValueError, match=message): lasso.fit(X, y[:, np.newaxis, np.newaxis])
def __call__(self, task_tuple): cov_param_idx = task_tuple[0] rep = task_tuple[1] algorithm = task_tuple[2] n_features = self.n_features n_samples = self.n_samples beta = gen_beta2(n_features, n_features, sparsity=1, betawidth=-1, seed=1234) cov_param = self.cov_params[cov_param_idx] sigma = gen_covariance(n_features, cov_param['correlation'], cov_param['block_size'], cov_param['L'], cov_param['t']) beta_ = sparsify_beta(beta, cov_param['block_size'], sparsity=0.25, seed=cov_param['block_size']) # Follow the procedure of generating beta with a fixed betaseed at the getgo # and then sparsifying as one goes on. Is this reproducible subsequently? t0 = time.time() X, X_test, y, y_test, ss = gen_data(n_samples, n_features, kappa=5, covariance=sigma, beta=beta_) # Standardize X = StandardScaler().fit_transform(X) y -= np.mean(y) if algorithm == 0: lasso = LassoCV(fit_intercept=False, cv=5) lasso.fit(X, y.ravel()) beta_hat = lasso.coef_ elif algorithm == 1: uoi = UoI_Lasso(fit_intercept=False, estimation_score='r2') uoi.fit(X, y) beta_hat = uoi.coef_ elif algorithm == 2: scad = PycassoCV(penalty='scad', fit_intercept=False, nfolds=5, n_alphas=100) scad.fit(X, y) beta_hat = scad.coef_ elif algorithm == 3: mcp = PycassoCV(penalty='mcp', fit_intercept=False, nfolds=5, n_alphas=100) mcp.fit(X, y) beta_hat = mcp.coef_ self.beta.append(beta_) self.beta_hat.append(beta_hat) self.task_signature.append((cov_param_idx, rep, algorithm)) print('call successful, algorithm %d took %f seconds' % (algorithm, time.time() - t0))