Ejemplo n.º 1
0
def test_set_params_get_params():
    '''Assert setting with double underscore
    parameter names will work ok'''
    p = Pipeline(flat_poly_var_kmeans)
    kw = dict(kmeans__n_clusters=9,
              poly__interaction_only=False,
              var__threshold=1e-8)
    p.set_params(**kw)
    params = p.get_params()
    for k, v in kw.items():
        assert k in params and params[k] == v
    with pytest.raises(ValueError):
        p.set_params(kmeans_n_clusters=9)  # no double underscore
def ensemble_init_func(pipe, **kw):
    '''Create an ensemble of regression models to predict soil moisture
    where PCA, scaling, and/or log transformation may follow preamble
    steps of flattening a Dataset and extracting the Y data, among other
    preprocessors.

    Parameters:
        pipe: Ignored
        **kw: Keyword arguments:
            scalers: List of (name, scaler) tuples such as
                     [('StandardScaler', steps.StandardScaler(with_mean=True)),
                      ('RobustScaler', steps.RobustScaler(with_centering=True))]
            n_components: List of PCA # of components to try. May include None
                          if skipping PCA step
            estimators: List of (name, estimator) tuples where estimator
                        may be any scikit-learn-like regressor, e.g.
                        [('estimator', LinearRegression())]
            log:        Log transform step, e.g.:
                        ('log', steps.ModifySample(log_scaler))
            summary:    String summary of premable steps to prepend to
                        parameter summary

    Returns:
        ensemble: List of Pipeline instances
    '''
    ensemble = []
    scalers = kw['scalers']
    n_components = kw['n_components']
    pca = kw['pca']
    estimators = kw['estimators']
    preamble = kw['preamble']
    summary_template = kw['summary']
    minmax_bounds = kw['minmax_bounds']
    log = kw['log']

    for s_label_0, scale_0 in scalers:
        if 'MinMax' in s_label_0:
            # Make MinMaxScaler objects
            labels = [s_label_0 + repr(mb) for mb in minmax_bounds]
            scalers_with_params = [scale_0(*mb) for mb in minmax_bounds]
            scalers_with_params = zip(labels, scalers_with_params)
        elif scale_0:
            # Just keep the StandardScaler as is
            scalers_with_params = [(s_label_0, scale_0())]
        else:
            # No scaling
            scalers_with_params = [(s_label_0, None)]
        for s_label, scale in scalers_with_params:
            for n_c in n_components:
                for e_label, estimator in estimators:
                    scale_step = [scale] if scale else []
                    if 'MinMax' in s_label:
                        # Log transform only works with MinMaxScaler
                        # and positive min bound
                        scale_step += [log]
                    pca_step = [pca()] if n_c and scale else []
                    new = Pipeline(
                        preamble() + scale_step + pca_step + [estimator()],
                        **pipeline_kw)
                    if pca_step:
                        new.set_params(pca__n_components=n_c)
                        msg = '{} components'.format(n_c)
                    else:
                        msg = ' (None)'
                    args = (s_label, msg, e_label)
                    summary = ': Scaler: {} PCA: {} Estimator: {}'.format(
                        *args)
                    new.summary = summary_template + summary
                    print(new.summary)
                    ensemble.append(new)
    return ensemble