def test_reg_params(): """Test whether the upper bound on the regularization parameters correctly zero out the coefficients.""" n_features = 20 n_inf = 10 n_classes = 5 X, y, w, b = make_classification(n_samples=200, random_state=101, n_classes=n_classes, n_informative=n_inf, n_features=n_features, shared_support=True) uoi_log = UoI_L1Logistic() uoi_log.output_dim = n_classes reg_params = uoi_log.get_reg_params(X, y) C = reg_params[0]['C'] # check that coefficients get set to zero lr = MaskedCoefLogisticRegression(penalty='l1', C=0.99 * C, standardize=False, fit_intercept=True) lr.fit(X, y) assert_equal(lr.coef_, 0.) # check that coefficients above the bound are not set to zero lr = MaskedCoefLogisticRegression(penalty='l1', C=1.01 * C, standardize=False, fit_intercept=True) lr.fit(X, y) assert np.count_nonzero(lr.coef_) > 0
def test_set_random_state(): """Tests whether random states are handled correctly.""" X, y, w, b = make_classification(n_samples=100, random_state=60, n_informative=4, n_features=5, w_scale=4.) # same state l1log_0 = UoI_L1Logistic(random_state=13) l1log_1 = UoI_L1Logistic(random_state=13) l1log_0.fit(X, y) l1log_1.fit(X, y) assert_array_equal(l1log_0.coef_, l1log_1.coef_) # different state l1log_1 = UoI_L1Logistic(random_state=14) l1log_1.fit(X, y) assert not np.array_equal(l1log_0.coef_, l1log_1.coef_) # different state, not set l1log_0 = UoI_L1Logistic() l1log_1 = UoI_L1Logistic() l1log_0.fit(X, y) l1log_1.fit(X, y) assert not np.array_equal(l1log_0.coef_, l1log_1.coef_)
def test_masked_logistic_standardize(): """Test the masked logistic regression class with `standardize=True`.""" n_features = 20 n_inf = 10 for shared_support in [True, False]: for n_classes in [2, 3]: for intercept in [True, False]: X, y, w, b = make_classification(n_samples=200, random_state=10, n_classes=n_classes, n_informative=n_inf, n_features=n_features, shared_support=shared_support, include_intercept=intercept, w_scale=4.) mask = np.squeeze(np.logical_not(np.equal(w, 0))) for penalty in ['l1', 'l2']: lr = MaskedCoefLogisticRegression(penalty=penalty, C=10., warm_start=True, fit_intercept=intercept, standardize=True) lr.fit(X, y, coef_mask=mask) coef_idxs = np.flatnonzero(np.equal(lr.coef_, 0.)) coef_idxs = set(coef_idxs.tolist()) mask_idxs = np.flatnonzero(np.equal(mask, 0)) mask_idxs = set(mask_idxs.tolist()) assert mask_idxs.issubset(coef_idxs) lr.fit(X, y, coef_mask=mask)
def test_l1logistic_binary(): """Test that binary L1 Logistic runs in the UoI framework.""" n_inf = 10 X, y, w, b = make_classification(n_samples=200, random_state=6, n_informative=n_inf, n_features=20, w_scale=4., include_intercept=True) l1log = UoI_L1Logistic(random_state=10, comm=MPI.COMM_WORLD).fit(X, y) assert (np.sign(abs(w)) == np.sign(abs(l1log.coef_))).mean() >= .7
def test_l1logistic_multiclass(): """Test that multiclass L1 Logistic runs in the UoI framework when all classes share a support.""" n_features = 20 n_inf = 10 X, y, w, b = make_classification(n_samples=200, random_state=10, n_classes=5, n_informative=n_inf, n_features=n_features, shared_support=True, w_scale=4.) l1log = UoI_L1Logistic(comm=MPI.COMM_WORLD).fit(X, y) assert (np.sign(abs(w)) == np.sign(abs(l1log.coef_))).mean() >= .8
def test_l1logistic_intercept(): """Test that binary L1 Logistic fits an intercept when run.""" for fi in [True, False]: X, y, w, b = make_classification(n_samples=100, random_state=11, n_features=4, w_scale=4., include_intercept=fi) l1log = UoI_L1Logistic(fit_intercept=fi, n_boots_sel=3, n_boots_est=3).fit(X, y) if not fi: assert_array_equal(l1log.intercept_, 0.) else: l1log.intercept_
def test_l1logistic_binary_multinomial(): """Test that binary L1 Logistic runs in the UoI framework using multi_class='multinomial'.""" n_inf = 10 X, y, w, b = make_classification(n_samples=200, random_state=6, n_informative=n_inf, n_features=20, w_scale=4., include_intercept=True) UoI_L1Logistic(random_state=10, multi_class='multinomial').fit(X, y) UoI_L1Logistic(random_state=10, fit_intercept=False, multi_class='multinomial').fit(X, y)
def test_l1logistic_binary(): """Test that binary L1 Logistic runs in the UoI framework.""" n_inf = 10 X, y, w, b = make_classification(n_samples=200, random_state=6, n_informative=n_inf, n_features=20, w_scale=4., include_intercept=True) l1log = UoI_L1Logistic(random_state=10).fit(X, y) l1log = UoI_L1Logistic(random_state=10, fit_intercept=False).fit(X, y) l1log.predict_proba(X) l1log.predict_log_proba(X) y_hat = l1log.predict(X) assert_equal(accuracy_score(y, y_hat), l1log.score(X, y)) assert (np.sign(abs(w)) == np.sign(abs(l1log.coef_))).mean() >= .8
def test_normalization_by_samples(): """Test that coef_ does not depend directly on the number of samples.""" n_features = 20 for n_classes in [2, 3]: X, y, w, b = make_classification(n_samples=200, random_state=10, n_classes=n_classes, n_informative=n_features, n_features=n_features, w_scale=4.) for penalty in ['l1', 'l2']: lr1 = MaskedCoefLogisticRegression(penalty=penalty, C=1e2) lr1.fit(X, y) lr3 = MaskedCoefLogisticRegression(penalty=penalty, C=1e2) lr3.fit(np.tile(X, (3, 1)), np.tile(y, 3)) assert_allclose(lr1.coef_, lr3.coef_)
def test_l1logistic_multiclass_not_shared(): """Test that multiclass L1 Logistic runs in the UoI framework when all classes share a support.""" n_features = 20 n_inf = 10 X, y, w, b = make_classification(n_samples=400, random_state=10, n_classes=5, n_informative=n_inf, n_features=n_features, shared_support=False, w_scale=4.) l1log = UoI_L1Logistic(shared_support=False).fit(X, y) l1log.predict_log_proba(X) y_hat = l1log.predict(X) assert_equal(accuracy_score(y, y_hat), l1log.score(X, y)) assert (np.sign(abs(w)) == np.sign(abs(l1log.coef_))).mean() >= .7
def test_l1logistic_binary_strings(): """Test that binary L1 Logistic runs in the UoI framework.""" n_inf = 10 X, y, w, b = make_classification(n_samples=200, random_state=6, n_informative=n_inf, n_features=20, w_scale=4., include_intercept=True) classes = ['a', 'b'] lb = LabelEncoder() lb.fit(classes) y = lb.inverse_transform(y) l1log = UoI_L1Logistic(random_state=10).fit(X, y) y_hat = l1log.predict(X) assert set(classes) >= set(y_hat)
def test_estimation_score_usage(): """Test the ability to change the estimation score in UoI L1Logistic""" methods = ('acc', 'log', 'BIC', 'AIC', 'AICc') X, y, w, b = make_classification(n_samples=200, random_state=6, n_informative=5, n_features=10) scores = [] for method in methods: l1log = UoI_L1Logistic(random_state=12, estimation_score=method, tol=1e-2, n_boots_sel=24, n_boots_est=24) assert_equal(l1log.estimation_score, method) l1log.fit(X, y) scores.append(l1log.scores_) scores = np.stack(scores) assert_equal(len(np.unique(scores, axis=0)), len(methods))
def test_l1logistic_multiclass_strings(): """Test that multiclass L1 Logistic runs in the UoI framework when all classes share a support.""" n_features = 20 n_inf = 10 X, y, w, b = make_classification(n_samples=200, random_state=10, n_classes=5, n_informative=n_inf, n_features=n_features, shared_support=True, w_scale=4.) classes = ['a', 'b', 'c', 'd', 'e'] lb = LabelEncoder() lb.fit(classes) y = lb.inverse_transform(y) l1log = UoI_L1Logistic(random_state=10).fit(X, y) y_hat = l1log.predict(X) assert set(classes) >= set(y_hat)