예제 #1
0
def test_set_random_state():
    """Tests whether random states are handled correctly."""
    X, y, w, b = make_classification(n_samples=100,
                                     random_state=60,
                                     n_informative=4,
                                     n_features=5,
                                     w_scale=4.)
    # same state
    l1log_0 = UoI_L1Logistic(random_state=13)
    l1log_1 = UoI_L1Logistic(random_state=13)
    l1log_0.fit(X, y)
    l1log_1.fit(X, y)
    assert_array_equal(l1log_0.coef_, l1log_1.coef_)

    # different state
    l1log_1 = UoI_L1Logistic(random_state=14)
    l1log_1.fit(X, y)
    assert not np.array_equal(l1log_0.coef_, l1log_1.coef_)

    # different state, not set
    l1log_0 = UoI_L1Logistic()
    l1log_1 = UoI_L1Logistic()
    l1log_0.fit(X, y)
    l1log_1.fit(X, y)
    assert not np.array_equal(l1log_0.coef_, l1log_1.coef_)
예제 #2
0
def test_fit_intercept():
    """Tests whether `include_intercept` in passed through to the linear models.
    """
    lr = UoI_L1Logistic(fit_intercept=True)
    assert lr._selection_lm.fit_intercept
    assert lr._estimation_lm.fit_intercept

    lr = UoI_L1Logistic(fit_intercept=False)
    assert not lr._selection_lm.fit_intercept
    assert not lr._estimation_lm.fit_intercept
예제 #3
0
def test_l1logistic_binary_multinomial():
    """Test that binary L1 Logistic runs in the UoI framework
    using multi_class='multinomial'."""
    n_inf = 10
    X, y, w, b = make_classification(n_samples=200,
                                     random_state=6,
                                     n_informative=n_inf,
                                     n_features=20,
                                     w_scale=4.,
                                     include_intercept=True)

    UoI_L1Logistic(random_state=10, multi_class='multinomial').fit(X, y)
    UoI_L1Logistic(random_state=10,
                   fit_intercept=False,
                   multi_class='multinomial').fit(X, y)
예제 #4
0
def test_reg_params():
    """Test whether the upper bound on the regularization parameters correctly
    zero out the coefficients."""
    n_features = 20
    n_inf = 10
    n_classes = 5
    X, y, w, b = make_classification(n_samples=200,
                                     random_state=101,
                                     n_classes=n_classes,
                                     n_informative=n_inf,
                                     n_features=n_features,
                                     shared_support=True)

    uoi_log = UoI_L1Logistic()
    uoi_log.output_dim = n_classes
    reg_params = uoi_log.get_reg_params(X, y)
    C = reg_params[0]['C']
    # check that coefficients get set to zero
    lr = MaskedCoefLogisticRegression(penalty='l1',
                                      C=0.99 * C,
                                      standardize=False,
                                      fit_intercept=True)
    lr.fit(X, y)
    assert_equal(lr.coef_, 0.)

    # check that coefficients above the bound are not set to zero
    lr = MaskedCoefLogisticRegression(penalty='l1',
                                      C=1.01 * C,
                                      standardize=False,
                                      fit_intercept=True)
    lr.fit(X, y)
    assert np.count_nonzero(lr.coef_) > 0
예제 #5
0
def test_l1logistic_binary():
    """Test that binary L1 Logistic runs in the UoI framework."""
    n_inf = 10
    X, y, w, b = make_classification(n_samples=200,
                                     random_state=6,
                                     n_informative=n_inf,
                                     n_features=20,
                                     w_scale=4.,
                                     include_intercept=True)

    l1log = UoI_L1Logistic(random_state=10).fit(X, y)
    l1log = UoI_L1Logistic(random_state=10, fit_intercept=False).fit(X, y)
    l1log.predict_proba(X)
    l1log.predict_log_proba(X)
    y_hat = l1log.predict(X)
    assert_equal(accuracy_score(y, y_hat), l1log.score(X, y))
    assert (np.sign(abs(w)) == np.sign(abs(l1log.coef_))).mean() >= .8
예제 #6
0
def test_l1logistic_bad_est_score():
    """Test that multiclass L1 Logistic raises an error when given a bad
    estimation_score value.
    """
    X = np.random.randn(20, 5)
    y = np.ones(20)

    with pytest.raises(ValueError):
        UoI_L1Logistic(estimation_score='z', n_boots_sel=10,
                       n_boots_est=10).fit(X, y)
예제 #7
0
def test_l1logistic_binary():
    """Test that binary L1 Logistic runs in the UoI framework."""
    n_inf = 4
    X, y, w, b = make_classification(n_samples=1000,
                                     random_state=6,
                                     n_informative=n_inf,
                                     n_features=6)

    l1log = UoI_L1Logistic(random_state=10, comm=MPI.COMM_WORLD).fit(X, y)
    assert_array_equal(np.sign(w), np.sign(l1log.coef_))
    assert_allclose(w, l1log.coef_, atol=.5, rtol=.5)
예제 #8
0
def test_l1logistic_sparse_input_no_center():
    """Test that multiclass L1 Logistic raises an error when asked to center
    sparse data.
    """
    rs = np.random.RandomState(17)
    X = sprand(10, 10, random_state=rs)
    classes = ['abc', 'de', 'fgh']
    y = np.array(classes)[rs.randint(3, size=10)]

    with pytest.raises(ValueError):
        UoI_L1Logistic(fit_intercept=True).fit(X, y)
예제 #9
0
def test_l1logistic_intercept():
    """Test that binary L1 Logistic fits an intercept when run."""
    for fi in [True, False]:
        X, y, w, b = make_classification(n_samples=100,
                                         random_state=11,
                                         n_features=4,
                                         w_scale=4.,
                                         include_intercept=fi)
        l1log = UoI_L1Logistic(fit_intercept=fi).fit(X, y)
        if not fi:
            assert_array_equal(l1log.intercept_, 0.)
        else:
            l1log.intercept_
예제 #10
0
def test_estimation_score_usage():
    """Test the ability to change the estimation score in UoI L1Logistic"""
    methods = ('acc', 'log')
    X, y, w, b = make_classification(n_samples=100,
                                     random_state=6,
                                     n_informative=2,
                                     n_features=6)
    scores = []
    for method in methods:
        l1log = UoI_L1Logistic(random_state=12, estimation_score=method)
        assert_equal(l1log.estimation_score, method)
        l1log.fit(X, y)
        score = np.max(l1log.scores_)
        scores.append(score)
    assert_equal(len(set(scores)), len(methods))
예제 #11
0
def test_l1logistic_binary():
    """Test that binary L1 Logistic runs in the UoI framework"""
    n_inf = 2
    X, y = make_classification(n_samples=100,
                               random_state=6,
                               n_informative=n_inf,
                               n_features=4,
                               n_repeated=0,
                               n_redundant=0)
    X = normalize(X, axis=0)
    l1log = UoI_L1Logistic().fit(X, y)
    # ensure shape conforms to sklearn convention
    assert l1log.coef_.shape == (1, 4)
    # check that we have weights on at least one of the informative features
    assert np.abs(np.sum(l1log.coef_[:, :n_inf])) > 0.0
예제 #12
0
def test_l1logistic_binary():
    """Test that binary L1 Logistic runs in the UoI framework."""
    n_inf = 4
    methods = ('acc', 'log')
    X, y, w, b = make_classification(n_samples=2000,
                                     random_state=6,
                                     n_informative=n_inf,
                                     n_features=10,
                                     w_scale=4.,
                                     include_intercept=True)

    for method in methods:
        l1log = UoI_L1Logistic(random_state=10,
                               estimation_score=method).fit(X, y)
        assert (np.sign(w) == np.sign(l1log.coef_)).mean() >= .8
        assert_allclose(w, l1log.coef_, rtol=.5, atol=.5)
예제 #13
0
def test_l1logistic_multiclass_not_shared():
    """Test that multiclass L1 Logistic runs in the UoI framework when all
       classes share a support."""
    n_features = 20
    n_inf = 10
    X, y, w, b = make_classification(n_samples=400,
                                     random_state=10,
                                     n_classes=5,
                                     n_informative=n_inf,
                                     n_features=n_features,
                                     shared_support=False,
                                     w_scale=4.)
    l1log = UoI_L1Logistic(shared_support=False).fit(X, y)
    l1log.predict_log_proba(X)
    y_hat = l1log.predict(X)
    assert_equal(accuracy_score(y, y_hat), l1log.score(X, y))
    assert (np.sign(abs(w)) == np.sign(abs(l1log.coef_))).mean() >= .7
예제 #14
0
def test_l1logistic_multiclass():
    """Test that multiclass L1 Logistic runs in the UoI framework when all
       classes share a support."""
    n_features = 4
    n_inf = 3
    X, y, w, b = make_classification(n_samples=1000,
                                     random_state=6,
                                     n_classes=3,
                                     n_informative=n_inf,
                                     n_features=n_features,
                                     shared_support=True)
    l1log = UoI_L1Logistic(comm=MPI.COMM_WORLD).fit(X, y)
    print()
    print(w)
    print(l1log.coef_)
    assert_array_equal(np.sign(w), np.sign(l1log.coef_))
    assert_allclose(w, l1log.coef_, atol=.5)
예제 #15
0
def test_l1logistic_binary_strings():
    """Test that binary L1 Logistic runs in the UoI framework."""
    n_inf = 10
    X, y, w, b = make_classification(n_samples=200,
                                     random_state=6,
                                     n_informative=n_inf,
                                     n_features=20,
                                     w_scale=4.,
                                     include_intercept=True)

    classes = ['a', 'b']
    lb = LabelEncoder()
    lb.fit(classes)
    y = lb.inverse_transform(y)

    l1log = UoI_L1Logistic(random_state=10).fit(X, y)
    y_hat = l1log.predict(X)
    assert set(classes) >= set(y_hat)
예제 #16
0
def test_l1logistic_sparse_input():
    """Test that multiclass L1 Logistic works when using sparse matrix
       inputs"""
    rs = np.random.RandomState(17)
    X = sprand(100, 100, random_state=rs)
    classes = ['abc', 'de', 'fgh']
    y = np.array(classes)[rs.randint(3, size=100)]

    kwargs = dict(
        fit_intercept=False,
        random_state=rs,
        n_boots_sel=4,
        n_boots_est=4,
        n_C=7,
    )
    l1log = UoI_L1Logistic(**kwargs).fit(X, y)

    y_hat = l1log.predict(X)
    assert set(classes) >= set(y_hat)
예제 #17
0
def test_estimation_score_usage():
    """Test the ability to change the estimation score in UoI L1Logistic"""
    methods = ('acc', 'log', 'BIC', 'AIC', 'AICc')
    X, y, w, b = make_classification(n_samples=200,
                                     random_state=6,
                                     n_informative=5,
                                     n_features=10)
    scores = []
    for method in methods:
        l1log = UoI_L1Logistic(random_state=12,
                               estimation_score=method,
                               tol=1e-2,
                               n_boots_sel=24,
                               n_boots_est=24)
        assert_equal(l1log.estimation_score, method)
        l1log.fit(X, y)
        scores.append(l1log.scores_)
    scores = np.stack(scores)
    assert_equal(len(np.unique(scores, axis=0)), len(methods))
예제 #18
0
def test_l1logistic_multiclass_strings():
    """Test that multiclass L1 Logistic runs in the UoI framework when all
       classes share a support."""
    n_features = 20
    n_inf = 10
    X, y, w, b = make_classification(n_samples=200,
                                     random_state=10,
                                     n_classes=5,
                                     n_informative=n_inf,
                                     n_features=n_features,
                                     shared_support=True,
                                     w_scale=4.)
    classes = ['a', 'b', 'c', 'd', 'e']
    lb = LabelEncoder()
    lb.fit(classes)
    y = lb.inverse_transform(y)

    l1log = UoI_L1Logistic(random_state=10).fit(X, y)
    y_hat = l1log.predict(X)
    assert set(classes) >= set(y_hat)
예제 #19
0
def test_l1logistic_no_ovr():
    """Test that binary L1 Logistic model raises an error for
    multiclass='ovr'."""
    with pytest.raises(ValueError):
        UoI_L1Logistic(multi_class='ovr')