def test_set_random_state(): """Tests whether random states are handled correctly.""" X, y, w, b = make_classification(n_samples=100, random_state=60, n_informative=4, n_features=5, w_scale=4.) # same state l1log_0 = UoI_L1Logistic(random_state=13) l1log_1 = UoI_L1Logistic(random_state=13) l1log_0.fit(X, y) l1log_1.fit(X, y) assert_array_equal(l1log_0.coef_, l1log_1.coef_) # different state l1log_1 = UoI_L1Logistic(random_state=14) l1log_1.fit(X, y) assert not np.array_equal(l1log_0.coef_, l1log_1.coef_) # different state, not set l1log_0 = UoI_L1Logistic() l1log_1 = UoI_L1Logistic() l1log_0.fit(X, y) l1log_1.fit(X, y) assert not np.array_equal(l1log_0.coef_, l1log_1.coef_)
def test_fit_intercept(): """Tests whether `include_intercept` in passed through to the linear models. """ lr = UoI_L1Logistic(fit_intercept=True) assert lr._selection_lm.fit_intercept assert lr._estimation_lm.fit_intercept lr = UoI_L1Logistic(fit_intercept=False) assert not lr._selection_lm.fit_intercept assert not lr._estimation_lm.fit_intercept
def test_l1logistic_binary_multinomial(): """Test that binary L1 Logistic runs in the UoI framework using multi_class='multinomial'.""" n_inf = 10 X, y, w, b = make_classification(n_samples=200, random_state=6, n_informative=n_inf, n_features=20, w_scale=4., include_intercept=True) UoI_L1Logistic(random_state=10, multi_class='multinomial').fit(X, y) UoI_L1Logistic(random_state=10, fit_intercept=False, multi_class='multinomial').fit(X, y)
def test_reg_params(): """Test whether the upper bound on the regularization parameters correctly zero out the coefficients.""" n_features = 20 n_inf = 10 n_classes = 5 X, y, w, b = make_classification(n_samples=200, random_state=101, n_classes=n_classes, n_informative=n_inf, n_features=n_features, shared_support=True) uoi_log = UoI_L1Logistic() uoi_log.output_dim = n_classes reg_params = uoi_log.get_reg_params(X, y) C = reg_params[0]['C'] # check that coefficients get set to zero lr = MaskedCoefLogisticRegression(penalty='l1', C=0.99 * C, standardize=False, fit_intercept=True) lr.fit(X, y) assert_equal(lr.coef_, 0.) # check that coefficients above the bound are not set to zero lr = MaskedCoefLogisticRegression(penalty='l1', C=1.01 * C, standardize=False, fit_intercept=True) lr.fit(X, y) assert np.count_nonzero(lr.coef_) > 0
def test_l1logistic_binary(): """Test that binary L1 Logistic runs in the UoI framework.""" n_inf = 10 X, y, w, b = make_classification(n_samples=200, random_state=6, n_informative=n_inf, n_features=20, w_scale=4., include_intercept=True) l1log = UoI_L1Logistic(random_state=10).fit(X, y) l1log = UoI_L1Logistic(random_state=10, fit_intercept=False).fit(X, y) l1log.predict_proba(X) l1log.predict_log_proba(X) y_hat = l1log.predict(X) assert_equal(accuracy_score(y, y_hat), l1log.score(X, y)) assert (np.sign(abs(w)) == np.sign(abs(l1log.coef_))).mean() >= .8
def test_l1logistic_bad_est_score(): """Test that multiclass L1 Logistic raises an error when given a bad estimation_score value. """ X = np.random.randn(20, 5) y = np.ones(20) with pytest.raises(ValueError): UoI_L1Logistic(estimation_score='z', n_boots_sel=10, n_boots_est=10).fit(X, y)
def test_l1logistic_binary(): """Test that binary L1 Logistic runs in the UoI framework.""" n_inf = 4 X, y, w, b = make_classification(n_samples=1000, random_state=6, n_informative=n_inf, n_features=6) l1log = UoI_L1Logistic(random_state=10, comm=MPI.COMM_WORLD).fit(X, y) assert_array_equal(np.sign(w), np.sign(l1log.coef_)) assert_allclose(w, l1log.coef_, atol=.5, rtol=.5)
def test_l1logistic_sparse_input_no_center(): """Test that multiclass L1 Logistic raises an error when asked to center sparse data. """ rs = np.random.RandomState(17) X = sprand(10, 10, random_state=rs) classes = ['abc', 'de', 'fgh'] y = np.array(classes)[rs.randint(3, size=10)] with pytest.raises(ValueError): UoI_L1Logistic(fit_intercept=True).fit(X, y)
def test_l1logistic_intercept(): """Test that binary L1 Logistic fits an intercept when run.""" for fi in [True, False]: X, y, w, b = make_classification(n_samples=100, random_state=11, n_features=4, w_scale=4., include_intercept=fi) l1log = UoI_L1Logistic(fit_intercept=fi).fit(X, y) if not fi: assert_array_equal(l1log.intercept_, 0.) else: l1log.intercept_
def test_estimation_score_usage(): """Test the ability to change the estimation score in UoI L1Logistic""" methods = ('acc', 'log') X, y, w, b = make_classification(n_samples=100, random_state=6, n_informative=2, n_features=6) scores = [] for method in methods: l1log = UoI_L1Logistic(random_state=12, estimation_score=method) assert_equal(l1log.estimation_score, method) l1log.fit(X, y) score = np.max(l1log.scores_) scores.append(score) assert_equal(len(set(scores)), len(methods))
def test_l1logistic_binary(): """Test that binary L1 Logistic runs in the UoI framework""" n_inf = 2 X, y = make_classification(n_samples=100, random_state=6, n_informative=n_inf, n_features=4, n_repeated=0, n_redundant=0) X = normalize(X, axis=0) l1log = UoI_L1Logistic().fit(X, y) # ensure shape conforms to sklearn convention assert l1log.coef_.shape == (1, 4) # check that we have weights on at least one of the informative features assert np.abs(np.sum(l1log.coef_[:, :n_inf])) > 0.0
def test_l1logistic_binary(): """Test that binary L1 Logistic runs in the UoI framework.""" n_inf = 4 methods = ('acc', 'log') X, y, w, b = make_classification(n_samples=2000, random_state=6, n_informative=n_inf, n_features=10, w_scale=4., include_intercept=True) for method in methods: l1log = UoI_L1Logistic(random_state=10, estimation_score=method).fit(X, y) assert (np.sign(w) == np.sign(l1log.coef_)).mean() >= .8 assert_allclose(w, l1log.coef_, rtol=.5, atol=.5)
def test_l1logistic_multiclass_not_shared(): """Test that multiclass L1 Logistic runs in the UoI framework when all classes share a support.""" n_features = 20 n_inf = 10 X, y, w, b = make_classification(n_samples=400, random_state=10, n_classes=5, n_informative=n_inf, n_features=n_features, shared_support=False, w_scale=4.) l1log = UoI_L1Logistic(shared_support=False).fit(X, y) l1log.predict_log_proba(X) y_hat = l1log.predict(X) assert_equal(accuracy_score(y, y_hat), l1log.score(X, y)) assert (np.sign(abs(w)) == np.sign(abs(l1log.coef_))).mean() >= .7
def test_l1logistic_multiclass(): """Test that multiclass L1 Logistic runs in the UoI framework when all classes share a support.""" n_features = 4 n_inf = 3 X, y, w, b = make_classification(n_samples=1000, random_state=6, n_classes=3, n_informative=n_inf, n_features=n_features, shared_support=True) l1log = UoI_L1Logistic(comm=MPI.COMM_WORLD).fit(X, y) print() print(w) print(l1log.coef_) assert_array_equal(np.sign(w), np.sign(l1log.coef_)) assert_allclose(w, l1log.coef_, atol=.5)
def test_l1logistic_binary_strings(): """Test that binary L1 Logistic runs in the UoI framework.""" n_inf = 10 X, y, w, b = make_classification(n_samples=200, random_state=6, n_informative=n_inf, n_features=20, w_scale=4., include_intercept=True) classes = ['a', 'b'] lb = LabelEncoder() lb.fit(classes) y = lb.inverse_transform(y) l1log = UoI_L1Logistic(random_state=10).fit(X, y) y_hat = l1log.predict(X) assert set(classes) >= set(y_hat)
def test_l1logistic_sparse_input(): """Test that multiclass L1 Logistic works when using sparse matrix inputs""" rs = np.random.RandomState(17) X = sprand(100, 100, random_state=rs) classes = ['abc', 'de', 'fgh'] y = np.array(classes)[rs.randint(3, size=100)] kwargs = dict( fit_intercept=False, random_state=rs, n_boots_sel=4, n_boots_est=4, n_C=7, ) l1log = UoI_L1Logistic(**kwargs).fit(X, y) y_hat = l1log.predict(X) assert set(classes) >= set(y_hat)
def test_estimation_score_usage(): """Test the ability to change the estimation score in UoI L1Logistic""" methods = ('acc', 'log', 'BIC', 'AIC', 'AICc') X, y, w, b = make_classification(n_samples=200, random_state=6, n_informative=5, n_features=10) scores = [] for method in methods: l1log = UoI_L1Logistic(random_state=12, estimation_score=method, tol=1e-2, n_boots_sel=24, n_boots_est=24) assert_equal(l1log.estimation_score, method) l1log.fit(X, y) scores.append(l1log.scores_) scores = np.stack(scores) assert_equal(len(np.unique(scores, axis=0)), len(methods))
def test_l1logistic_multiclass_strings(): """Test that multiclass L1 Logistic runs in the UoI framework when all classes share a support.""" n_features = 20 n_inf = 10 X, y, w, b = make_classification(n_samples=200, random_state=10, n_classes=5, n_informative=n_inf, n_features=n_features, shared_support=True, w_scale=4.) classes = ['a', 'b', 'c', 'd', 'e'] lb = LabelEncoder() lb.fit(classes) y = lb.inverse_transform(y) l1log = UoI_L1Logistic(random_state=10).fit(X, y) y_hat = l1log.predict(X) assert set(classes) >= set(y_hat)
def test_l1logistic_no_ovr(): """Test that binary L1 Logistic model raises an error for multiclass='ovr'.""" with pytest.raises(ValueError): UoI_L1Logistic(multi_class='ovr')