def make_regressors(subset=None): available_regressors = { 'gbrt': set_grid( GradientBoostingRegressor(), n_estimators=[2**i for i in range(1, 11)], learning_rate=[0.1, 0.01, 0.001], ), 'lasso': set_grid( Lasso(), alpha=np.exp(np.linspace(-8, 8)), ) } if subset is None: subset = list(available_regressors.keys()) result = [available_regressors[k] for k in subset] pipe = Pipeline([('finmodel', DummyRegressor())]) pipe = set_grid(pipe, finmodel=result) return pipe
def test_build_param_grid_set_estimator(): clf1 = SVC() clf2 = LogisticRegression() clf3 = SVC() clf4 = SGDClassifier() estimator = set_grid(Pipeline([('sel', set_grid(SelectKBest(), k=[2, 3])), ('clf', None)]), clf=[ set_grid(clf1, kernel=['linear']), clf2, set_grid(clf3, kernel=['poly'], degree=[2, 3]), clf4 ]) param_grid = [{ 'clf': [clf1], 'clf__kernel': ['linear'], 'sel__k': [2, 3] }, { 'clf': [clf3], 'clf__kernel': ['poly'], 'clf__degree': [2, 3], 'sel__k': [2, 3] }, { 'clf': [clf2, clf4], 'sel__k': [2, 3] }] assert build_param_grid(estimator) == param_grid
def test_rnn(datafnc): from sklearn.neural_network import MLPClassifier from sklearn.model_selection import train_test_split from sklearn.model_selection import GridSearchCV from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from searchgrid import set_grid, build_param_grid from noxer.sequences import PadSubsequence X, y = datafnc() X_train, X_test, y_train, y_test = train_test_split(X, y) estimator = make_pipeline( set_grid(GRUClassifier(n_neurons=64, n_layers=1, epochs=100), alpha=[1e-4, 1e-3, 1e-2])) estimator = make_pipeline( set_grid(CNN1DClassifier(epochs=64), alpha=[0.01], n_layers=[1], n_neurons=[32], dropout=[0.2, 0.3, 0.4])) model = GridSearchCV(estimator=estimator, param_grid=build_param_grid(estimator), verbose=1000, cv=3, n_jobs=1) model.fit(X_train, y_train) print(datafnc.__name__) print(model.score(X_test, y_test))
def make_dummy_regressor(subset=None): available_regressors = { 'dummy': set_grid(DummyRegressor(), strategy=['mean', 'median']), } if subset is None: subset = list(available_regressors.keys()) result = [available_regressors[k] for k in subset] pipe = Pipeline([('model', Lasso())]) pipe = set_grid(pipe, model=result) return pipe
def grid_regressors(subset=None): """Create a number of regressors with corresponding parameter ranges that will be iterated over with GridSearchCV. Parameters ---------- subset : array - like or None Subset of models to use for grid search. Returns ------- regressors : list List of regressors with attached grids. """ available_regressors = { 'gbrt': set_grid(GradientBoostingRegressor(), n_estimators=[2**i for i in range(1, 11)], learning_rate=[0.1, 0.01, 0.001]), 'svr': set_grid(SVR(), C=np.logspace(-5, 5, 20), epsilon=[0.1, 0.01, 0.001], gamma=np.logspace(-5, 5, 20)), 'tree': set_grid( DecisionTreeRegressor(), min_samples_split=np.logspace(-5, 0, 20), max_depth=list(range(1, 20)), ), 'lasso': set_grid( Lasso(), alpha=np.exp(np.linspace(-8, 8)), ) } if subset is None: subset = list(available_regressors.keys()) result = [available_regressors[k] for k in subset] return result
def test_make_grid_search(): X, y = load_iris(return_X_y=True) lr = LogisticRegression() svc = set_grid(SVC(), kernel=['poly'], degree=[2, 3]) gs1 = make_grid_search(lr, cv=5) # empty grid gs2 = make_grid_search(svc, cv=5) gs3 = make_grid_search([lr, svc], cv=5) for gs, n_results in [(gs1, 1), (gs2, 2), (gs3, 3)]: gs.fit(X, y) assert gs.cv == 5 assert len(gs.cv_results_['params']) == n_results svc_mask = gs3.cv_results_['param_root'] == svc assert svc_mask.sum() == 2 assert gs3.cv_results_['param_root__degree'][svc_mask].tolist() == [2, 3] assert gs3.cv_results_['param_root'][~svc_mask].tolist() == [lr]
def test_dnn_v_dnn(datafnc): from sklearn.neural_network import MLPClassifier from sklearn.model_selection import train_test_split from sklearn.model_selection import GridSearchCV from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from searchgrid import set_grid, build_param_grid X, y = datafnc() X_train, X_test, y_train, y_test = train_test_split(X, y) estimator = make_pipeline( StandardScaler(), set_grid(PMLPClassifier(), epochs=[2**i for i in range(1, 8)], n_layers=list(range(1, 4)), n_neurons=[2**i for i in range(1, 8)], alpha=[1e-4, 1e-3, 1e-2])) model = GridSearchCV(estimator=estimator, param_grid=build_param_grid(estimator), verbose=1000, cv=3, n_jobs=2) mlp = GridSearchCV( estimator=make_pipeline( StandardScaler(), MLPClassifier(), ), param_grid={'mlpclassifier__max_iter': [2**i for i in range(1, 8)]}, verbose=1000, cv=3) model.fit(X_train, y_train) mlp.fit(X_train, y_train) print(datafnc.__name__) print(model.score(X_test, y_test)) print(mlp.score(X_test, y_test))
features = make_union( number(1), # Pclass category(3), # Gender number(4), # Age number(5), # SibSp category(6), # Parch number(8), # Fare category(10), # Embarked ) estimator = Pipeline([('features', features), ('scaler', StandardScaler()), ('model', LinearSVC())]) models = [ set_grid(DecisionTreeClassifier(), min_samples_split=[2**-i for i in range(1, 8)], max_depth=list(range(1, 21))), set_grid(LinearSVC(), C=np.logspace(-6, 6, num=20)), ] estimator = set_grid(estimator, model=[ set_grid(GradientBoostingClassifier(), n_estimators=[2**i for i in range(1, 11)], learning_rate=np.logspace(-4, 0, num=10)), ], scaler=[StandardScaler()]) param_grid = build_param_grid(estimator) gsearch = GridSearchCV(estimator=estimator,
import pytest from sklearn.pipeline import Pipeline, make_pipeline from sklearn.svm import SVC from sklearn.linear_model import LogisticRegression, SGDClassifier from sklearn.feature_selection import SelectKBest from sklearn.datasets import load_iris from searchgrid import set_grid, build_param_grid, make_grid_search @pytest.mark.parametrize(('estimator', 'param_grid'), [ (set_grid(SVC(), C=[1, 2]), { 'C': [1, 2] }), (set_grid(SVC(), C=[1, 2], gamma=[1, 2]), { 'C': [1, 2], 'gamma': [1, 2] }), (make_pipeline(set_grid(SVC(), C=[1, 2], gamma=[1, 2])), { 'svc__C': [1, 2], 'svc__gamma': [1, 2] }), ]) def test_build_param_grid(estimator, param_grid): assert build_param_grid(estimator) == param_grid # pytest.mark.xfail( # (set_grid(SVC(), [{'kernel': ['linear']}, # {'kernel': 'rbf', 'gamma': [1, 2]}]), # [{'kernel': ['linear']}, {'kernel': 'rbf', 'gamma': [1, 2]}])), # pytest.mark.xfail(