def test_fit_intercept(): clf = Classifier() assert isinstance(clf.fit_intercept, bool) assert clf.fit_intercept is True clf.fit_intercept = False assert isinstance(clf.fit_intercept, bool) assert clf.fit_intercept is False for fit_intercept in [0, 1, -1, complex(1.0, 1.0), "1.0", "true"]: with pytest.raises(ValueError) as exc_info: clf.fit_intercept = fit_intercept assert exc_info.type is ValueError match = "fit_intercept must be True or False; got (C=%r)" % fit_intercept assert exc_info.value.args[0] == match for fit_intercept in [0, 1, -1, complex(1.0, 1.0), "1.0", "true"]: with pytest.raises(ValueError) as exc_info: Classifier(fit_intercept=fit_intercept) assert exc_info.type is ValueError match = "fit_intercept must be True or False; got (C=%r)" % fit_intercept assert exc_info.value.args[0] == match setattr(clf, "fit_intercept", True) assert getattr(clf, "fit_intercept") is True
def test_estimator(): clf = Classifier() assert clf.estimator == "erm" for estimator in Classifier._estimators: clf.estimator = estimator assert clf.estimator == estimator estimator = "stuff" with pytest.raises(ValueError) as exc_info: clf.estimator = estimator assert exc_info.type is ValueError match = "estimator must be one of %r; got (estimator=%r)" % ( Classifier._estimators, estimator, ) assert exc_info.value.args[0] == match estimator = "stuff" with pytest.raises(ValueError) as exc_info: _ = Classifier(estimator=estimator) assert exc_info.type is ValueError match = "estimator must be one of %r; got (estimator=%r)" % ( Classifier._estimators, estimator, ) assert exc_info.value.args[0] == match setattr(clf, "estimator", "mom") assert getattr(clf, "estimator") == "mom"
def run_algorithm(data, algo, rep, col_try, col_algo, col_train_loss, col_test_loss, col_train_acc, col_test_acc, col_fit_time): X_train, X_test, y_train, y_test = data n_samples = len(y_train) announce(rep, algo.name, "running") clf = Classifier( tol=tol, max_iter=max_iter, solver=algo.solver, loss=loss, estimator=algo.estimator, fit_intercept=fit_intercept, step_size=step_size, penalty=penalty, l1_ratio=l1_ratio, C=1 / (n_samples * lamda), ) clf.fit(X_train, y_train, dummy_first_step=True) announce(rep, algo.name, "fitted") col_try.append(rep) col_algo.append(algo.name) col_train_loss.append(objective(X_train, y_train, clf)) col_test_loss.append(objective(X_test, y_test, clf)) col_train_acc.append(accuracy(X_train, y_train, clf)) col_test_acc.append(accuracy(X_test, y_test, clf)) col_fit_time.append(clf.fit_time())
def test_tol(): clf = Classifier() assert isinstance(clf.tol, float) assert clf.tol == 1e-4 clf.tol = 3.14e-3 assert isinstance(clf.tol, float) assert clf.tol == 3.14e-3 for tol in [-1, complex(1.0, 1.0), "1.0"]: # , 0.0]: with pytest.raises(ValueError) as exc_info: clf.tol = tol assert exc_info.type is ValueError match = ( "Tolerance for stopping criteria must be non negative; got (tol=%r)" % tol) assert exc_info.value.args[0] == match for tol in [-1, complex(1.0, 1.0), "1.0"]: # , 0.0]: with pytest.raises(ValueError) as exc_info: Classifier(tol=tol) assert exc_info.type is ValueError match = ( "Tolerance for stopping criteria must be non negative; got (tol=%r)" % tol) assert exc_info.value.args[0] == match setattr(clf, "tol", 3.14) assert getattr(clf, "tol") == 3.14
def test_penalty(): clf = Classifier() assert clf.penalty == "l2" for penalty in Classifier._penalties: clf.penalty = penalty assert clf.penalty == penalty penalty = "stuff" with pytest.raises(ValueError) as exc_info: clf.penalty = penalty assert exc_info.type is ValueError match = "penalty must be one of %r; got (penalty=%r)" % ( Classifier._penalties, penalty, ) assert exc_info.value.args[0] == match penalty = "stuff" with pytest.raises(ValueError) as exc_info: _ = Classifier(penalty=penalty) assert exc_info.type is ValueError match = "penalty must be one of %r; got (penalty=%r)" % ( Classifier._penalties, penalty, ) assert exc_info.value.args[0] == match setattr(clf, "penalty", "l1") assert getattr(clf, "penalty") == "l1"
def test_max_iter(): clf = Classifier() assert isinstance(clf.max_iter, int) assert clf.max_iter == 100 clf.max_iter = 42.0 assert isinstance(clf.max_iter, int) assert clf.max_iter == 42 for max_iter in [-1, 0, complex(1.0, 1.0), "1.0"]: with pytest.raises(ValueError) as exc_info: clf.max_iter = max_iter assert exc_info.type is ValueError match = ( "Maximum number of iteration must be positive; got (max_iter=%r)" % max_iter) assert exc_info.value.args[0] == match for max_iter in [-1, 0.0, complex(1.0, 1.0), "1.0"]: with pytest.raises(ValueError) as exc_info: Classifier(max_iter=max_iter) assert exc_info.type is ValueError match = ( "Maximum number of iteration must be positive; got (max_iter=%r)" % max_iter) assert exc_info.value.args[0] == match setattr(clf, "max_iter", 123) assert getattr(clf, "max_iter") == 123
def test_estimators_on_simple_data(estimator, solver, fit_intercept): n_samples = 1_000 n_features = 3 verbose = False random_state = 2 X, y, coef0, intercept0 = simulate_true_logistic( n_samples=n_samples, n_features=n_features, random_state=random_state, fit_intercept=fit_intercept, return_coef=True, ) X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, stratify=y, random_state=42, test_size=0.25) kwargs = { "estimator": estimator, "solver": solver, "verbose": verbose, "random_state": random_state, } clf = Classifier(**kwargs).fit(X_train, y_train) y_score = clf.predict_proba(X_test)[:, 1] assert roc_auc_score(y_test, y_score) >= 0.8 assert coef0 == pytest.approx(clf.coef_.ravel(), abs=0.5, rel=0.5) assert intercept0 == pytest.approx(clf.intercept_, abs=0.5, rel=0.5)
def test_loss(): clf = Classifier() assert clf.loss == "logistic" for loss in Classifier._losses: clf.loss = loss assert clf.loss == loss loss = "stuff" with pytest.raises(ValueError) as exc_info: clf.loss = loss assert exc_info.type is ValueError match = "loss must be one of %r; got (loss=%r)" % ( Classifier._losses, loss, ) assert exc_info.value.args[0] == match loss = "stuff" with pytest.raises(ValueError) as exc_info: _ = Classifier(loss=loss) assert exc_info.type is ValueError match = "loss must be one of %r; got (loss=%r)" % ( Classifier._losses, loss, ) assert exc_info.value.args[0] == match setattr(clf, "loss", "logistic") assert getattr(clf, "loss") == "logistic"
def test_solver(): clf = Classifier() assert clf.solver == "cgd" for solver in Classifier._solvers: clf.solver = solver assert clf.solver == solver solver = "stuff" with pytest.raises(ValueError) as exc_info: clf.solver = solver assert exc_info.type is ValueError match = "solver must be one of %r; got (solver=%r)" % ( Classifier._solvers, solver, ) assert exc_info.value.args[0] == match solver = "stuff" with pytest.raises(ValueError) as exc_info: _ = Classifier(solver=solver) assert exc_info.type is ValueError match = "solver must be one of %r; got (solver=%r)" % ( Classifier._solvers, solver, ) assert exc_info.value.args[0] == match setattr(clf, "solver", "cgd") assert getattr(clf, "solver") == "cgd"
def test_block_size(): clf = Classifier() assert isinstance(clf.block_size, float) assert clf.block_size == 0.07 clf.block_size = 0.123 assert isinstance(clf.block_size, float) assert clf.block_size == 0.123 for block_size in [-1, complex(1.0, 1.0), "1.0", 0.0, -1.0, 1.1]: with pytest.raises(ValueError) as exc_info: clf.block_size = block_size assert exc_info.type is ValueError match = "block_size must be in (0, 1]; got (block_size=%r)" % block_size assert exc_info.value.args[0] == match for block_size in [-1, complex(1.0, 1.0), "1.0", 0.0, -1.0, 1.1]: with pytest.raises(ValueError) as exc_info: _ = Classifier(block_size=block_size) assert exc_info.type is ValueError match = "block_size must be in (0, 1]; got (block_size=%r)" % block_size assert exc_info.value.args[0] == match setattr(clf, "block_size", 0.42) assert getattr(clf, "block_size") == 0.42
def test_l1_ratio(): clf = Classifier() assert isinstance(clf.l1_ratio, float) assert clf.l1_ratio == 0.5 clf.l1_ratio = 0.123 assert isinstance(clf.l1_ratio, float) assert clf.l1_ratio == 0.123 clf.l1_ratio = 0.0 assert isinstance(clf.l1_ratio, float) assert clf.l1_ratio == 0.0 clf.l1_ratio = 1.0 assert isinstance(clf.l1_ratio, float) assert clf.l1_ratio == 1.0 for l1_ratio in [-1, complex(1.0, 1.0), "1.0", -1.0, 1.1]: with pytest.raises(ValueError) as exc_info: clf.l1_ratio = l1_ratio assert exc_info.type is ValueError match = "l1_ratio must be in (0, 1]; got (l1_ratio=%r)" % l1_ratio assert exc_info.value.args[0] == match for l1_ratio in [-1, complex(1.0, 1.0), "1.0", -1.0, 1.1]: with pytest.raises(ValueError) as exc_info: _ = Classifier(l1_ratio=l1_ratio) assert exc_info.type is ValueError match = "l1_ratio must be in (0, 1]; got (l1_ratio=%r)" % l1_ratio assert exc_info.value.args[0] == match setattr(clf, "l1_ratio", 0.42) assert getattr(clf, "l1_ratio") == 0.42
def test_C(): clf = Classifier() assert isinstance(clf.C, float) assert clf.C == 1.0 clf.C = 42e1 assert isinstance(clf.C, float) assert clf.C == 420.0 clf.C = 0 assert isinstance(clf.C, float) assert clf.C == 0.0 for C in [-1, complex(1.0, 1.0), "1.0"]: with pytest.raises(ValueError) as exc_info: clf.C = C assert exc_info.type is ValueError match = "C must be a positive number; got (C=%r)" % C assert exc_info.value.args[0] == match for C in [-1, complex(1.0, 1.0), "1.0"]: with pytest.raises(ValueError) as exc_info: Classifier(C=C) assert exc_info.type is ValueError match = "C must be a positive number; got (C=%r)" % C assert exc_info.value.args[0] == match setattr(clf, "C", 3.140) assert getattr(clf, "C") == 3.14
def test_that_array_conversion_is_ok(): import pandas as pd n_samples = 20 X, y = simulate_linear(n_samples) X_df = pd.DataFrame(X) weird = {0: "neg", 1: "pos"} y_weird = [weird[yi] for yi in y] br = Classifier(tol=1e-17, max_iter=200).fit(X_df, y_weird) lr = LogisticRegression(tol=1e-17, max_iter=200).fit(X_df, y_weird) assert br.intercept_ == pytest.approx(lr.intercept_, abs=1e-4) assert br.coef_ == pytest.approx(lr.coef_, abs=1e-4) # And test prediction methods assert lr.decision_function(X) == pytest.approx(br.decision_function(X), abs=1e-4) assert lr.predict_proba(X) == pytest.approx(br.predict_proba(X), abs=1e-4) assert lr.predict_log_proba(X) == pytest.approx(br.predict_log_proba(X), abs=1e-4) assert (lr.predict(X) == br.predict(X)).any()
def test_elasticnet_l1_ridge_are_consistent(fit_intercept, C, solver): n_samples = 128 n_features = 5 tol = 1e-10 max_iter = 200 verbose = False X, y = simulate_true_logistic( n_samples=n_samples, n_features=n_features, fit_intercept=fit_intercept, ) args = { "tol": tol, "max_iter": max_iter, "verbose": verbose, "step_size": 7.0, "fit_intercept": fit_intercept, "random_state": 42, } def approx(v): return pytest.approx(v, abs=1e-7) if solver == "cgd" and C == 0.001 and fit_intercept: args["step_size"] = 0.5 if solver == "saga" and C == 1000.0 and fit_intercept: args["max_iter"] = 220 # Test that elasticnet with l1_ratio=0.0 is the same as penalty="l2" clf_elasticnet = Classifier(penalty="elasticnet", C=C, l1_ratio=0.0, solver=solver, **args) clf_l2 = Classifier(penalty="l2", C=C, solver=solver, **args) clf_elasticnet.fit(X, y) clf_l2.fit(X, y) assert clf_elasticnet.coef_ == approx(clf_l2.coef_) assert clf_elasticnet.intercept_ == approx(clf_l2.intercept_) # Test that elasticnet with l1_ratio=1.0 is the same as penalty="l1" clf_elasticnet = Classifier(penalty="elasticnet", C=C, l1_ratio=1.0, solver=solver, **args) clf_l1 = Classifier(penalty="l1", C=C, l1_ratio=0.0, solver=solver, **args) clf_elasticnet.fit(X, y) clf_l1.fit(X, y) assert clf_elasticnet.intercept_ == approx(clf_l1.intercept_) assert clf_elasticnet.coef_ == approx(clf_l1.coef_)
def test_fit_same_sklearn_circles(fit_intercept, penalty, C, l1_ratio, solver): """ This is a test that checks on many combinations that Classifier gets the same coef_ and intercept_ as scikit-learn on simulated data """ n_samples = 150 tol = 1e-15 max_iter = 300 verbose = False random_state = 42 X, y = make_circles(n_samples=n_samples, noise=0.2, random_state=random_state) def approx(v): return pytest.approx(v, abs=1e-4) args = { "tol": tol, "max_iter": max_iter, "verbose": verbose, "fit_intercept": fit_intercept, "random_state": 42, } if penalty == "none": if C != 1.0 or l1_ratio != 0.5: return clf_scikit = LogisticRegression(penalty=penalty, solver="saga", **args) elif penalty == "l2": if l1_ratio != 0.5: return clf_scikit = LogisticRegression(penalty=penalty, C=C, solver="saga", **args) elif penalty == "l1": if l1_ratio != 0.5: return clf_scikit = LogisticRegression(penalty=penalty, C=C, solver="saga", **args) elif penalty == "elasticnet": clf_scikit = LogisticRegression(penalty=penalty, C=C, solver="saga", l1_ratio=l1_ratio, **args) else: raise ValueError("Weird penalty %r" % penalty) if solver in ["svrg", "saga"]: args["step_size"] = 3.0 clf_scikit.fit(X, y) clf_linlearn = Classifier(penalty=penalty, C=C, l1_ratio=l1_ratio, solver=solver, **args) clf_linlearn.fit(X, y) if not (penalty in ["l1", "elasticnet"] and fit_intercept and C <= 1e-1): assert clf_scikit.intercept_ == approx(clf_linlearn.intercept_) assert clf_scikit.coef_ == approx(clf_linlearn.coef_)
def test_fit_same_sklearn_logistic(fit_intercept, penalty, C, l1_ratio, solver): """ This is a test that checks on many combinations that Classifier gets the same coef_ and intercept_ as scikit-learn on simulated data """ n_samples = 128 n_features = 5 tol = 1e-10 max_iter = 300 verbose = False X, y = simulate_true_logistic( n_samples=n_samples, n_features=n_features, fit_intercept=fit_intercept, ) args = { "tol": tol, "max_iter": max_iter, "verbose": verbose, "fit_intercept": fit_intercept, "random_state": 42, } if penalty == "none": # A single test is required for penalty="none" if C != 1.0 or l1_ratio != 0.5: return clf_scikit = LogisticRegression(penalty=penalty, solver="saga", **args) elif penalty == "l2": if l1_ratio != 0.5: return clf_scikit = LogisticRegression(penalty=penalty, C=C, solver="saga", **args) elif penalty == "l1": if l1_ratio != 0.5: return clf_scikit = LogisticRegression(penalty=penalty, C=C, solver="saga", **args) elif penalty == "elasticnet": clf_scikit = LogisticRegression(penalty=penalty, C=C, solver="saga", l1_ratio=l1_ratio, **args) else: raise ValueError("Weird penalty %r" % penalty) if solver in ["svrg", "saga", "gd"]: abs_approx, rel_approx = 1e-2, 1e-2 args["step_size"] = 2.5 if solver == "saga": args["max_iter"] = 400 else: abs_approx, rel_approx = 1e-6, 1e-6 clf_scikit.fit(X, y) # We compare with saga since it supports all penalties # clf_scikit = LogisticRegression(solver="saga", **args).fit(X, y) clf_linlearn = Classifier(penalty=penalty, C=C, l1_ratio=l1_ratio, solver=solver, **args) clf_linlearn.fit(X, y) # For some weird reason scikit's intercept_ does not match for "l1" and # "elasticnet" with intercept and for small C if not (penalty in ["l1", "elasticnet"] and fit_intercept and C < 1e-1): # Test the intercept_ assert clf_scikit.intercept_ == pytest.approx(clf_linlearn.intercept_, abs=abs_approx, rel=rel_approx) # And test prediction methods assert clf_scikit.decision_function(X) == pytest.approx( clf_linlearn.decision_function(X), abs=abs_approx, rel=rel_approx) assert clf_scikit.predict_proba(X) == pytest.approx( clf_linlearn.predict_proba(X), abs=abs_approx, rel=rel_approx) assert clf_scikit.predict_log_proba(X) == pytest.approx( clf_linlearn.predict_log_proba(X), abs=abs_approx, rel=rel_approx) assert (clf_scikit.predict(X) == clf_linlearn.predict(X)).any() assert clf_scikit.score(X, y) == clf_linlearn.score(X, y) # And always test the coef_ assert clf_scikit.coef_ == pytest.approx(clf_linlearn.coef_, abs=abs_approx, rel=rel_approx)
def test_keyword_args_only(): with pytest.raises(TypeError) as exc_info: _ = Classifier("l2") assert exc_info.type is TypeError match = "__init__() takes 1 positional argument but 2 were given" assert exc_info.value.args[0] == match
def run_algorithm(data, algo, rep, col_try, col_algo, col_metric, col_val, col_time, col_sc_prods): X_train, X_test, y_train, y_test = data n_samples = len(y_train) announce(rep, algo.name, "running") params = finetuned_params[algo.name] if params is None: print("cgd_IS is %r" % cgd_IS) clf = Classifier( tol=tol, max_iter=max_iter, solver=algo.solver, loss=loss, estimator=algo.estimator, fit_intercept=fit_intercept, step_size=step_size, penalty=penalty, cgd_IS=cgd_IS, l1_ratio=l1_ratio, C=1 / (n_samples * lamda), ) else: clf = Classifier(tol=tol, max_iter=max_iter, loss=loss, fit_intercept=fit_intercept, step_size=step_size, penalty=penalty, l1_ratio=l1_ratio, C=1 / (n_samples * lamda), **params) clf.fit(X_train, y_train, dummy_first_step=True) announce(rep, algo.name, "fitted") clf.compute_objective_history(X_train, y_train, metric="objective") clf.compute_objective_history(X_test, y_test, metric="objective") clf.compute_objective_history(X_train, y_train, metric="misclassif_rate") clf.compute_objective_history(X_test, y_test, metric="misclassif_rate") announce(rep, algo.name, "computed history") records = clf.history_.records[1:] for j, metric in enumerate( ["train_loss", "test_loss", "misclassif_train", "misclassif_test"]): # for i in range(len(records[0])): for i in range(records[0].cursor): col_try.append(rep) col_algo.append(algo.name) col_metric.append(metric) col_val.append(records[1 + j].record[i]) col_time.append(i) # records[0].record[i] - records[0].record[0])# col_sc_prods.append(clf.history_.record_nm("sc_prods").record[i])
def test_fit_same_sklearn_simulated_multiclass(fit_intercept, penalty, C, l1_ratio, solver): """ This is a test that checks on many combinations that Classifier gets the same coef_ and intercept_ as scikit-learn on simulated data """ tol = 1e-10 max_iter = 200 # so many iterations needed to reach necessary precision ... verbose = False step_size = 1.0 n_samples = 128 n_features = 5 n_classes = 3 X, y = make_classification( n_samples=n_samples, n_features=n_features, n_classes=n_classes, n_informative=n_features, n_redundant=0, n_repeated=0, random_state=random_state, ) # X, y = load_iris(return_X_y=True) args = { "tol": tol, "max_iter": max_iter, "verbose": verbose, "fit_intercept": fit_intercept, "random_state": 42, "multi_class": "multinomial", } # if solver in ["svrg", "saga", "gd"] and fit_intercept: # abs_approx, rel_approx = 1e-4, 1e-4 # else: # abs_approx, rel_approx = 1e-6, 1e-6 abs_approx, rel_approx = 1e-4, 1e-4 ####################################### if penalty == "l1" and C == 1.0 and fit_intercept: args["max_iter"] = 300 if penalty == "elasticnet" and C == 1.0: step_size = 1.0 args["max_iter"] = 600 abs_approx, rel_approx = 1e-2, 1e-2 ####################################### if penalty == "elasticnet" and C == 1.0 and l1_ratio == 0.1: step_size = 1.0 args["max_iter"] = 900 abs_approx, rel_approx = 1e-2, 1e-2 ####################################### if penalty == "elasticnet" and C == 1.0 and l1_ratio == 0.5: args["max_iter"] = 1000 abs_approx, rel_approx = 1e-2, 1e-2 ####################################### if penalty == "none": # A single test is required for penalty="none" if C != 1.0 or l1_ratio != 0.5: return clf_scikit = LogisticRegression(penalty=penalty, solver="saga", **args) elif penalty == "l2": if l1_ratio != 0.5: return clf_scikit = LogisticRegression(penalty=penalty, C=C, solver="saga", **args) elif penalty == "l1": if l1_ratio != 0.5: return clf_scikit = LogisticRegression(penalty=penalty, C=C, solver="saga", **args) elif penalty == "elasticnet": clf_scikit = LogisticRegression(penalty=penalty, C=C, solver="saga", l1_ratio=l1_ratio, **args) else: raise ValueError("Weird penalty %r" % penalty) if solver == "gd": step_size = 1.5 elif solver in ["svrg", "saga"]: step_size = 5.0 clf_scikit.fit(X, y) # We compare with saga since it supports all penalties # clf_scikit = LogisticRegression(solver="saga", **args).fit(X, y) args.pop("multi_class") clf_linlearn = Classifier(penalty=penalty, loss="multilogistic", step_size=step_size, C=C, l1_ratio=l1_ratio, solver=solver, **args) clf_linlearn.fit(X, y) # For some weird reason scikit's intercept_ does not match for "l1" and # "elasticnet" with intercept and for small C if not (penalty in ["l1", "elasticnet"] and fit_intercept and C < 1e-1): # Test the intercept_ assert clf_scikit.intercept_ == pytest.approx(clf_linlearn.intercept_, abs=abs_approx, rel=rel_approx) # And test prediction methods assert clf_scikit.decision_function(X) == pytest.approx( clf_linlearn.decision_function(X), abs=abs_approx, rel=rel_approx) assert clf_scikit.predict_proba(X) == pytest.approx( clf_linlearn.predict_proba(X), abs=abs_approx, rel=rel_approx) assert clf_scikit.predict_log_proba(X) == pytest.approx( clf_linlearn.predict_log_proba(X), abs=abs_approx, rel=rel_approx) assert (clf_scikit.predict(X) == clf_linlearn.predict(X)).any() assert clf_scikit.score(X, y) == clf_linlearn.score(X, y) # And always test the coef_ assert clf_scikit.coef_ == pytest.approx(clf_linlearn.coef_, abs=abs_approx, rel=rel_approx) # @pytest.mark.parametrize("fit_intercept", (False, True)) # @pytest.mark.parametrize("penalty", penalties[1:]) # don't test iris with none penalty # @pytest.mark.parametrize("C", grid_C) # @pytest.mark.parametrize("l1_ratio", grid_l1_ratio) # @pytest.mark.parametrize("solver", solvers) # def test_fit_same_sklearn_iris( # fit_intercept, penalty, C, l1_ratio, solver # ): # """ # This is a test that checks on many combinations that Classifier gets the # same coef_ and intercept_ as scikit-learn on the iris dataset # """ # tol = 1e-10 # max_iter = 400 # so many iterations needed to reach necessary precision ... # verbose = False # step_size = 1.0 # # X, y = load_iris(return_X_y=True) # mean = X.mean(axis=0) # std = X.std(axis=0) # X = (X - mean) / std # # std_scaler = StandardScaler() # # X = std_scaler.fit_transform(X) # # args = { # "tol": tol, # "max_iter": max_iter, # "verbose": verbose, # "fit_intercept": fit_intercept, # "random_state": 42, # "multi_class": "multinomial", # } # # # if solver in ["svrg", "saga", "gd"] and fit_intercept: # # abs_approx, rel_approx = 1e-4, 1e-4 # # else: # # abs_approx, rel_approx = 1e-6, 1e-6 # abs_approx, rel_approx = 1e-3, 1e-3 ####################################### # # if penalty == "elasticnet" and l1_ratio == 0.5: # step_size = 1.5 # if (penalty == "l1") or (penalty == "elasticnet" and l1_ratio == 0.9): # step_size = 2.0 # args["max_iter"] = 1200 # if penalty == "l1" and fit_intercept: # step_size = 3.5 # args["max_iter"] = 1500 # abs_approx, rel_approx = 1e-2, 1e-2 ####################################### # # if penalty == "none": # # A single test is required for penalty="none" # if C != 1.0 or l1_ratio != 0.5: # return # clf_scikit = LogisticRegression(penalty=penalty, solver="saga", **args) # elif penalty == "l2": # if l1_ratio != 0.5: # return # clf_scikit = LogisticRegression(penalty=penalty, C=C, solver="saga", **args) # elif penalty == "l1": # if l1_ratio != 0.5: # return # clf_scikit = LogisticRegression(penalty=penalty, C=C, solver="saga", **args) # elif penalty == "elasticnet": # clf_scikit = LogisticRegression( # penalty=penalty, C=C, solver="saga", l1_ratio=l1_ratio, **args # ) # else: # raise ValueError("Weird penalty %r" % penalty) # # if solver == "gd": # step_size *= 2.6 # elif solver in ["svrg", "saga"]: # step_size *= 4.0 # # clf_scikit.fit(X, y) # # We compare with saga since it supports all penalties # # clf_scikit = LogisticRegression(solver="saga", **args).fit(X, y) # args.pop("multi_class") # clf_linlearn = Classifier( # penalty=penalty, # loss="multilogistic", # C=C, # step_size=step_size, # l1_ratio=l1_ratio, # solver=solver, # **args # ) # clf_linlearn.fit(X, y) # # # And always test the coef_ # # assert clf_scikit.coef_ == pytest.approx( # clf_linlearn.coef_, abs=abs_approx, rel=rel_approx # ) # # # For some weird reason scikit's intercept_ does not match for "l1" and # # "elasticnet" with intercept and for small C # if not (penalty in ["l1", "elasticnet"] and fit_intercept and C < 1e-1): # # Test the intercept_ # assert clf_scikit.intercept_ == pytest.approx( # clf_linlearn.intercept_, abs=abs_approx, rel=rel_approx # ) # # And test prediction methods # assert clf_scikit.decision_function(X) == pytest.approx( # clf_linlearn.decision_function(X), abs=abs_approx, rel=rel_approx # ) # assert clf_scikit.predict_proba(X) == pytest.approx( # clf_linlearn.predict_proba(X), abs=abs_approx, rel=rel_approx # ) # assert clf_scikit.predict_log_proba(X) == pytest.approx( # clf_linlearn.predict_log_proba(X), abs=abs_approx, rel=rel_approx # ) # assert (clf_scikit.predict(X) == clf_linlearn.predict(X)).any() # assert clf_scikit.score(X, y) == clf_linlearn.score(X, y) # # # @pytest.mark.parametrize("fit_intercept", (False, True)) # @pytest.mark.parametrize("penalty", penalties[1:]) # don't test the wine dataset with no penalty # @pytest.mark.parametrize("C", grid_C) # @pytest.mark.parametrize("l1_ratio", grid_l1_ratio) # @pytest.mark.parametrize("solver", solvers) # def test_fit_same_sklearn_wine( # fit_intercept, penalty, C, l1_ratio, solver # ): # """ # This is a test that checks on many combinations that Classifier gets the # same coef_ and intercept_ as scikit-learn on the iris dataset # """ # tol = 1e-10 # max_iter = 400 # so many iterations needed to reach necessary precision ... # verbose = False # step_size = 1.0 # # X, y = load_wine(return_X_y=True) # mean = X.mean(axis=0) # std = X.std(axis=0) # X = (X - mean) / std # # std_scaler = StandardScaler() # # X = std_scaler.fit_transform(X) # # args = { # "tol": tol, # "max_iter": max_iter, # "verbose": verbose, # "fit_intercept": fit_intercept, # "random_state": 42, # "multi_class": "multinomial", # } # # # if solver in ["svrg", "saga", "gd"] and fit_intercept: # # abs_approx, rel_approx = 1e-4, 1e-4 # # else: # # abs_approx, rel_approx = 1e-6, 1e-6 # abs_approx, rel_approx = 1e-3, 1e-3 # # if penalty == "l2" and C == 1.0 and fit_intercept: # step_size = 2.0 # if penalty == "l1" and C == 1.0: # step_size = 2.0 # args["max_iter"] = 900 # if penalty == "elasticnet" and C == 1.0: # step_size = 2.0 # args["max_iter"] = 600 # if solver == "gd" and l1_ratio == 0.9: # abs_approx, rel_approx = 1e-2, 1e-2 # # if penalty == "none": # # A single test is required for penalty="none" # if C != 1.0 or l1_ratio != 0.5: # return # clf_scikit = LogisticRegression(penalty=penalty, solver="saga", **args) # elif penalty == "l2": # if l1_ratio != 0.5: # return # clf_scikit = LogisticRegression(penalty=penalty, C=C, solver="saga", **args) # elif penalty == "l1": # if l1_ratio != 0.5: # return # clf_scikit = LogisticRegression(penalty=penalty, C=C, solver="saga", **args) # elif penalty == "elasticnet": # clf_scikit = LogisticRegression( # penalty=penalty, C=C, solver="saga", l1_ratio=l1_ratio, **args # ) # else: # raise ValueError("Weird penalty %r" % penalty) # # if solver == "gd": # step_size *= 5.0 # elif solver in ["svrg", "saga"]: # step_size *= 15.5 # # clf_scikit.fit(X, y) # # We compare with saga since it supports all penalties # # clf_scikit = LogisticRegression(solver="saga", **args).fit(X, y) # args.pop("multi_class") # clf_linlearn = Classifier( # penalty=penalty, # loss="multilogistic", # step_size=step_size, # C=C, # l1_ratio=l1_ratio, # solver=solver, # **args # ) # clf_linlearn.fit(X, y) # # # And always test the coef_ # # assert clf_scikit.coef_ == pytest.approx( # clf_linlearn.coef_, abs=abs_approx, rel=rel_approx # ) # # # For some weird reason scikit's intercept_ does not match for "l1" and # # "elasticnet" with intercept and for small C # if not (penalty in ["l1", "elasticnet"] and fit_intercept and C < 1e-1): # # Test the intercept_ # assert clf_scikit.intercept_ == pytest.approx( # clf_linlearn.intercept_, abs=abs_approx, rel=rel_approx # ) # # And test prediction methods # assert clf_scikit.decision_function(X) == pytest.approx( # clf_linlearn.decision_function(X), abs=abs_approx, rel=rel_approx # ) # assert clf_scikit.predict_proba(X) == pytest.approx( # clf_linlearn.predict_proba(X), abs=abs_approx, rel=rel_approx # ) # assert clf_scikit.predict_log_proba(X) == pytest.approx( # clf_linlearn.predict_log_proba(X), abs=abs_approx, rel=rel_approx # ) # assert (clf_scikit.predict(X) == clf_linlearn.predict(X)).any() # assert clf_scikit.score(X, y) == clf_linlearn.score(X, y)