예제 #1
0
def test_variable_selection():
    """Test basic functionality of UoI_Lasso and that it finds right model"""
    X, y, w = make_regression(coef=True, random_state=1)
    lasso = UoI_Lasso(comm=MPI.COMM_WORLD)
    lasso.fit(X, y)
    true_coef = np.nonzero(w)[0]
    fit_coef = np.nonzero(lasso.coef_)[0]
    assert_array_equal(true_coef, fit_coef)
    assert_array_almost_equal_nulp(true_coef, fit_coef)
예제 #2
0
def test_variable_selection():
    """Test basic functionality of UoI_Lasso and that it finds right model"""
    from sklearn.datasets import make_regression
    X, y, w = make_regression(coef=True, random_state=1)
    lasso = UoI_Lasso()
    lasso.fit(X, y)
    true_coef = np.nonzero(w)[0]
    fit_coef = np.nonzero(lasso.coef_)[0]
    assert_array_equal(true_coef, fit_coef)
    assert_array_almost_equal_nulp(true_coef, fit_coef)
예제 #3
0
def test_intercept():
    """Test that UoI Lasso properly calculates the intercept when centering
    the response variable."""

    X = np.array([[-1, 2], [0, 1], [1, 3], [4, 3]])
    y = np.array([8, 5, 14, 17])

    lasso = UoI_Lasso(normalize=False, fit_intercept=True)
    lasso.fit(X, y)

    assert lasso.intercept_ == np.mean(y) - np.dot(X.mean(axis=0), lasso.coef_)
예제 #4
0
def test_estimation_score_usage():
    """Test the ability to change the estimation score in UoI Lasso."""

    methods = ('r2', 'AIC', 'AICc', 'BIC')
    X, y = make_regression(n_features=10, n_informative=3, random_state=10)
    scores = []
    for method in methods:
        lasso = UoI_Lasso(estimation_score=method)
        assert_equal(lasso.estimation_score, method)
        lasso.fit(X, y)
        score = np.max(lasso.scores_)
        scores.append(score)
    assert_equal(len(np.unique(scores)), len(methods))
예제 #5
0
def test_uoi_lasso_toy():
    """Test UoI Lasso on a toy example."""

    X = np.array([[-1, 2], [4, 1], [1, 3], [4, 3], [8, 11]], dtype=float)
    beta = np.array([1, 4], dtype=float)
    y = np.dot(X, beta)

    # choose selection_frac to be slightly smaller to ensure that we get
    # good test sets
    lasso = UoI_Lasso(fit_intercept=False,
                      selection_frac=0.75,
                      estimation_frac=0.75)
    lasso.fit(X, y)

    assert_allclose(lasso.coef_, beta)
예제 #6
0
def test_uoi_lasso_toy():
    """Test UoI Lasso on a toy example."""

    X = np.array([[-1, 2], [4, 1], [1, 3], [4, 3], [8, 11]], dtype=float)
    beta = np.array([1, 4], dtype=float)
    y = np.dot(X, beta)

    # choose selection_frac to be slightly smaller to ensure that we get
    # good test sets

    # Also test both choices of solver
    lasso = UoI_Lasso(fit_intercept=False,
                      selection_frac=0.75,
                      estimation_frac=0.75,
                      solver='cd')
    lasso.fit(X, y)
    lasso.fit(X, y, verbose=True)

    assert_allclose(lasso.coef_, beta)

    if pycasso is not None:
        lasso = UoI_Lasso(fit_intercept=False,
                          selection_frac=0.75,
                          estimation_frac=0.75,
                          solver='pyc')
        lasso.fit(X, y)

        assert_allclose(lasso.coef_, beta)
예제 #7
0
def test_intercept_and_coefs_no_selection():
    """Test that UoI Lasso properly calculates the intercept with and without
    standardization."""
    # create line model
    X, y, beta, intercept = make_linear_regression(n_samples=500,
                                                   n_features=2,
                                                   n_informative=2,
                                                   snr=10.,
                                                   include_intercept=True,
                                                   random_state=2332)

    # without standardization
    lasso = UoI_Lasso(standardize=False, fit_intercept=True)
    lasso.fit(X, y)
    assert_allclose(lasso.intercept_, intercept, rtol=0.25)
    assert_allclose(lasso.coef_, beta, rtol=0.25)

    # with standardization
    lasso = UoI_Lasso(standardize=True, fit_intercept=True)
    lasso.fit(X, y)
    assert_allclose(lasso.intercept_, intercept, rtol=0.25)
    assert_allclose(lasso.coef_, beta, rtol=0.25)
예제 #8
0
def test_set_random_state():
    """Tests whether random states are handled correctly."""
    X, y = make_regression(n_features=5,
                           n_informative=3,
                           random_state=16,
                           noise=.5)
    # same state
    l1log_0 = UoI_Lasso(random_state=13)
    l1log_1 = UoI_Lasso(random_state=13)
    l1log_0.fit(X, y)
    l1log_1.fit(X, y)
    assert_array_equal(l1log_0.coef_, l1log_1.coef_)

    # different state
    l1log_1 = UoI_Lasso(random_state=14)
    l1log_1.fit(X, y)
    assert not np.array_equal(l1log_0.coef_, l1log_1.coef_)

    # different state, not set
    l1log_0 = UoI_Lasso()
    l1log_1 = UoI_Lasso()
    l1log_0.fit(X, y)
    l1log_1.fit(X, y)
    assert not np.array_equal(l1log_0.coef_, l1log_1.coef_)
예제 #9
0
def test_uoi_lasso_estimation_shape_match():
    """Test UoI Lasso on a toy example."""
    n_samples = 40
    n_features = 10
    X, y = make_regression(n_samples=n_samples,
                           n_features=n_features,
                           n_informative=5,
                           random_state=1)

    lasso = UoI_Lasso()
    lasso.fit(X, y)
    with pytest.raises(ValueError,
                       match='Targets and predictions are ' +
                       'not the same shape.'):
        support = np.ones(n_features).astype(bool)
        boot_idxs = [np.arange(n_samples)] * 2
        lasso.coef_ = np.random.randn(2, n_features)
        lasso._score_predictions('r2', lasso, X, y, support, boot_idxs)

    with pytest.raises(ValueError, match='y should either have'):
        support = np.ones(n_features).astype(bool)
        boot_idxs = [np.arange(n_samples)] * 2
        lasso._score_predictions('r2', lasso, X, y[:, np.newaxis, np.newaxis],
                                 support, boot_idxs)
예제 #10
0
def test_uoi_lasso_fit_shape_match():
    """Test UoI Lasso on a toy example."""

    n_samples = 40
    n_features = 10
    X, y = make_regression(n_samples=n_samples,
                           n_features=n_features,
                           n_informative=5,
                           random_state=1)

    lasso = UoI_Lasso()

    # Check that second axis gets squeezed
    lasso.fit(X, y[:, np.newaxis])

    # Check that second axis gets squeezed
    message = 'y should either have shape'
    with pytest.raises(ValueError, match=message):
        lasso.fit(X, np.tile(y[:, np.newaxis], (1, 2)))
    with pytest.raises(ValueError, match=message):
        lasso.fit(X, y[:, np.newaxis, np.newaxis])
예제 #11
0
    def __call__(self, task_tuple):

        cov_param_idx = task_tuple[0]
        rep = task_tuple[1]
        algorithm = task_tuple[2]

        n_features = self.n_features
        n_samples = self.n_samples

        beta = gen_beta2(n_features,
                         n_features,
                         sparsity=1,
                         betawidth=-1,
                         seed=1234)

        cov_param = self.cov_params[cov_param_idx]

        sigma = gen_covariance(n_features, cov_param['correlation'],
                               cov_param['block_size'], cov_param['L'],
                               cov_param['t'])

        beta_ = sparsify_beta(beta,
                              cov_param['block_size'],
                              sparsity=0.25,
                              seed=cov_param['block_size'])

        # Follow the procedure of generating beta with a fixed betaseed at the getgo
        # and then sparsifying as one goes on. Is this reproducible subsequently?

        t0 = time.time()
        X, X_test, y, y_test, ss = gen_data(n_samples,
                                            n_features,
                                            kappa=5,
                                            covariance=sigma,
                                            beta=beta_)

        # Standardize
        X = StandardScaler().fit_transform(X)
        y -= np.mean(y)

        if algorithm == 0:
            lasso = LassoCV(fit_intercept=False, cv=5)
            lasso.fit(X, y.ravel())
            beta_hat = lasso.coef_

        elif algorithm == 1:

            uoi = UoI_Lasso(fit_intercept=False, estimation_score='r2')
            uoi.fit(X, y)
            beta_hat = uoi.coef_

        elif algorithm == 2:
            scad = PycassoCV(penalty='scad',
                             fit_intercept=False,
                             nfolds=5,
                             n_alphas=100)
            scad.fit(X, y)
            beta_hat = scad.coef_

        elif algorithm == 3:

            mcp = PycassoCV(penalty='mcp',
                            fit_intercept=False,
                            nfolds=5,
                            n_alphas=100)

            mcp.fit(X, y)
            beta_hat = mcp.coef_

        self.beta.append(beta_)
        self.beta_hat.append(beta_hat)
        self.task_signature.append((cov_param_idx, rep, algorithm))
        print('call successful, algorithm %d took %f seconds' %
              (algorithm, time.time() - t0))