def test_set_params_get_params(): '''Assert setting with double underscore parameter names will work ok''' p = Pipeline(flat_poly_var_kmeans) kw = dict(kmeans__n_clusters=9, poly__interaction_only=False, var__threshold=1e-8) p.set_params(**kw) params = p.get_params() for k, v in kw.items(): assert k in params and params[k] == v with pytest.raises(ValueError): p.set_params(kmeans_n_clusters=9) # no double underscore
def ensemble_init_func(pipe, **kw): '''Create an ensemble of regression models to predict soil moisture where PCA, scaling, and/or log transformation may follow preamble steps of flattening a Dataset and extracting the Y data, among other preprocessors. Parameters: pipe: Ignored **kw: Keyword arguments: scalers: List of (name, scaler) tuples such as [('StandardScaler', steps.StandardScaler(with_mean=True)), ('RobustScaler', steps.RobustScaler(with_centering=True))] n_components: List of PCA # of components to try. May include None if skipping PCA step estimators: List of (name, estimator) tuples where estimator may be any scikit-learn-like regressor, e.g. [('estimator', LinearRegression())] log: Log transform step, e.g.: ('log', steps.ModifySample(log_scaler)) summary: String summary of premable steps to prepend to parameter summary Returns: ensemble: List of Pipeline instances ''' ensemble = [] scalers = kw['scalers'] n_components = kw['n_components'] pca = kw['pca'] estimators = kw['estimators'] preamble = kw['preamble'] summary_template = kw['summary'] minmax_bounds = kw['minmax_bounds'] log = kw['log'] for s_label_0, scale_0 in scalers: if 'MinMax' in s_label_0: # Make MinMaxScaler objects labels = [s_label_0 + repr(mb) for mb in minmax_bounds] scalers_with_params = [scale_0(*mb) for mb in minmax_bounds] scalers_with_params = zip(labels, scalers_with_params) elif scale_0: # Just keep the StandardScaler as is scalers_with_params = [(s_label_0, scale_0())] else: # No scaling scalers_with_params = [(s_label_0, None)] for s_label, scale in scalers_with_params: for n_c in n_components: for e_label, estimator in estimators: scale_step = [scale] if scale else [] if 'MinMax' in s_label: # Log transform only works with MinMaxScaler # and positive min bound scale_step += [log] pca_step = [pca()] if n_c and scale else [] new = Pipeline( preamble() + scale_step + pca_step + [estimator()], **pipeline_kw) if pca_step: new.set_params(pca__n_components=n_c) msg = '{} components'.format(n_c) else: msg = ' (None)' args = (s_label, msg, e_label) summary = ': Scaler: {} PCA: {} Estimator: {}'.format( *args) new.summary = summary_template + summary print(new.summary) ensemble.append(new) return ensemble