def base_hyperparam_tuning (X,y,base_learners, param_dicts, n_iterations = 100): '''基层模型超参数调节,当前评估指标为auc''' X = X.values y = y.values scorer = make_scorer(metrics.roc_auc_score, greater_is_better=True) evl = Evaluator(scorer, cv=5, verbose = 20, backend= 'multiprocessing') evl.fit(X, y, estimators=base_learners, param_dicts=param_dicts, n_iter = n_iterations) df_params = pd.DataFrame(evl.results) return df_params
def layer_hyperparam_tuning(X,y,pre_layer_learners, local_layer_learners, param_dicts_layer, n_iterations = 50, pre_params = 'params_base.csv'): '''中间层超参数调节,加入需按顺序''' X = X.values y = y.values scorer = make_scorer(metrics.roc_auc_score, greater_is_better=True) params_pre = pd.read_csv(pre_params) params_pre.set_index(['Unnamed: 0'], inplace = True) for case_name, params in params_pre["params"].items(): case_est = case_name params = eval(params) for est_name, est in pre_layer_learners: if est_name == case_est: est.set_params(**params) in_layer = SuperLearner(folds = 10, backend= 'multiprocessing', model_selection=True) in_layer.add(pre_layer_learners,proba=True) preprocess = [in_layer] evl = Evaluator(scorer,cv=5,verbose = 20,backend= 'multiprocessing') evl.fit(X, y, local_layer_learners, param_dicts = param_dicts_layer, preprocessing={'meta': preprocess},n_iter=n_iterations) df_params_layer = pd.DataFrame(evl.results) return in_layer, df_params_layer
def test_fit_score(): """[Parallel | Evaluation] Test fit-score function.""" out = fit_score(case='test', tr_list=[], est_name='ols', est=OLS(), params=(0, { 'offset': 2 }), x=X, y=y, idx=((0, 5), (5, 10)), scorer=make_scorer(mape, greater_is_better=False), error_score=None) assert out[0] == 'test' assert out[1] == 'ols' assert out[2] == 0 np.testing.assert_almost_equal(out[3], -1.5499999999999992, 5) np.testing.assert_almost_equal(out[4], -2.0749999999999993, 5)
def evaluateSecondLayer(base_learners, x_train, y_train, meta_learners, param_dicts): in_layer = EnsembleTransformer() print("adding base learners to transformer") in_layer.add('stack', base_learners) preprocess = [in_layer] print("creating scorer") scorer = make_scorer(mean_absolute_error, greater_is_better=False) evl = Evaluator(scorer, cv=4, verbose=1) print("fitting evaluator") evl.fit( x_train.values, y_train.values, meta_learners, param_dicts, preprocessing={'meta': preprocess}, n_iter=40 # bump this up to do a larger grid search ) table = pd.DataFrame(evl.summary) table.to_html('iteration5.html') table.to_csv('iteration5.csv', index=False, header=False, sep='\t')
# The following example evaluates a `Naive Bayes`_ estimator and a # `K-Nearest-Neighbor`_ estimator under three different preprocessing scenarios: # no preprocessing, standard scaling, and subset selection. # In the latter case, preprocessing is constituted by selecting a subset of # features. # # The scoring function # ^^^^^^^^^^^^^^^^^^^^ # # .. currentmodule:: mlens.metrics # # An important note is that the scoring function must be wrapped by # :func:`make_scorer`, to ensure all scoring functions behave similarly regardless # of whether they measure accuracy or errors. To wrap a function, simple do: from mlens.metrics import make_scorer accuracy_scorer = make_scorer(accuracy_score, greater_is_better=True) ############################################################################## # .. currentmodule:: mlens.model_selection # # The ``make_scorer`` wrapper # is a copy of the Scikit-learn's :func:`sklearn.metrics.make_scorer`, and you # can import the Scikit-learn version as well. # Note however that to pickle the :class:`Evaluator`, you **must** import # ``make_scorer`` from ``mlens``. # # A simple evaluation # ^^^^^^^^^^^^^^^^^^^ # # Before throwing preprocessing into the mix, let's see how to evaluate a set of # estimator. First, we need a list of estimator and a dictionary of parameter
'colsample_bytree': uniform(0.6, 0.4), 'reg_lambda': uniform(1, 2), 'reg_alpha': uniform(1, 2), }, 'rf': { 'max_depth': randint(2, 5), 'min_samples_split': randint(5, 20), 'min_samples_leaf': randint(10, 20), 'n_estimators': randint(50, 100), 'max_features': uniform(0.6, 0.3) } } # In[ ]: scorer = make_scorer(mean_absolute_error, greater_is_better=False) evl = Evaluator( scorer, cv=2, random_state=SEED, verbose=5, ) # In[ ]: evl.fit( xtrain, ytrain, estimators=base_learners, param_dicts=param_dicts,
from contextlib import redirect_stderr except ImportError: from mlens.externals.fixes import redirect as redirect_stderr np.random.seed(100) # Stack is nonsense here - we just need proba to be false X, y = Data('stack', False, False).get_data((100, 2), 20) def failed_score(p, y): """Bad scoring function to test exception handling.""" raise ValueError("This fails.") mape_scorer = make_scorer(mape, greater_is_better=False) bad_scorer = make_scorer(failed_score) def test_check(): """[Model Selection] Test check of valid estimator.""" np.testing.assert_raises(ValueError, Evaluator, mape) def test_raises(): """[Model Selection] Test raises on error.""" evl = Evaluator(bad_scorer) np.testing.assert_raises(ValueError, evl.fit,
pars_1 = {} sc = StandardScaler() pca = PCA() fa = FactorAnalysis() nmf = NMF() pre_cases = { 'case-1': [sc], # 'case-2': [sc], # 'case-3': [pca], # 'case-4': [fa] } score = make_scorer(score_func=accuracy_score, greater_is_better=True, needs_proba=False, needs_threshold=False) ensemble = SequentialEnsemble(model_selection=True, n_jobs=1, shuffle=False, random_state=seed) ensemble.add('stack', ests_1, preprocessing=pre_cases) ensemble.add_meta(SVC(kernel='linear', degree=5, tol=1e-4)) # ensemble.fit(X_train, y_train) # y_pred = ensemble.predict(X_test) # ens = ensemble evaluator = Evaluator(scorer=score, random_state=seed, verbose=True) evaluator.fit(data_pix, spacial_pix,
from sklearn.linear_model import LinearRegression, TheilSenRegressor, RANSACRegressor, Ridge, Lasso # from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error, r2_score from sklearn.preprocessing import StandardScaler from mlens.metrics import make_scorer from mlens.model_selection import Evaluator seed = 2018 np.random.seed(seed) # image_set = np.genfromtxt('../testdata/c1_gn.csv', delimiter=',') # label_set = np.genfromtxt('../testdata/c1_L_gn.csv', delimiter=',') r2_scorer = make_scorer(r2_score) ests = [('rdg', Ridge(max_iter=4000)), ('las', Lasso(max_iter=4000))] a = uniform(0, 10) params = { 'rdg': {'alpha': a}, 'las': {'alpha': a} } preproc = { 'none': [], 'sc': [StandardScaler()] }
import numpy as np # from sklearn.decomposition import PCA from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, BaggingRegressor from sklearn.linear_model import LinearRegression, LogisticRegression, Ridge, Lasso from sklearn.metrics import r2_score, accuracy_score # from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVR from mlens.metrics import make_scorer from mlens.ensemble import SuperLearner seed = np.random.seed(2018) scorer = make_scorer(r2_score, greater_is_better=False) def build_ensemble(incl_meta, meta_type='log', preprocessors=None, estimators=None, propagate_features=None): if propagate_features: n = len(propagate_features) propagate_features_1 = propagate_features propagate_features_2 = [i for i in range(n)] else: propagate_features_1 = propagate_features_2 = None if not estimators: estimators = [('rfr', RandomForestRegressor(random_state=seed)),
# Set parameter mapping # Here, we differentiate distributions between cases for the random forest params = { 'svc': { 'C': uniform(0, 10) }, 'class.rf': { 'max_depth': randint(2, 10) }, 'proba.rf': { 'max_depth': randint(2, 10), 'max_features': uniform(0.5, 0.5) } } scorer = make_scorer(accuracy_score) evaluator = Evaluator(scorer=scorer, random_state=seed, cv=2) evaluator.fit(X, y, meta_learners, params, preprocessing=preprocessing, n_iter=2) ############################################################################## # We can now compare the performance of the best fit for each candidate # meta learner. print("Results:\n%s" % evaluator.results)
# from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import Lasso, Ridge, LinearRegression, LogisticRegression from sklearn.metrics import r2_score # from sklearn.model_selection import train_test_split # from sklearn.preprocessing import StandardScaler from mlens.model_selection import Evaluator from mlens.metrics import make_scorer # from mlens.preprocessing import Subset # image_set = np.genfromtxt('../testdata/c1_gn.csv', delimiter=',') # label_set = np.genfromtxt('../testdata/c1_L_gn.csv', delimiter=',') # # X_train, X_test, y_train, y_test = train_test_split(image_set, label_set, test_size=0.33) score_f = make_scorer(score_func=r2_score, greater_is_better=False) evaluator = Evaluator(scorer=score_f, shuffle=True, verbose=True) estimators = [ ('las', Lasso(copy_X=True, max_iter=4000)), ('rdg', Ridge(copy_X=True, max_iter=4000)), # ('rfr', RandomForestRegressor()), ] params = { 'las': { 'alpha': uniform(0, 5) }, 'rdg': { 'alpha': uniform(0, 5)