Esempio n. 1
0
def _bench_ml(sensor,
              x_train,
              y_train,
              x_test,
              *,
              x_other=None,
              verbose=False,
              seed=42,
              bagging=True,
              gridsearch=False,
              scale=True,
              methods=None,
              **kwargs):

    from sklearn.preprocessing import RobustScaler, MinMaxScaler
    from sklearn.model_selection import GridSearchCV
    from sklearn.multioutput import MultiOutputRegressor
    from sklearn.ensemble import BaggingRegressor

    from .ML import models

    args = getattr(kwargs, 'args', None)
    seed = getattr(args, 'seed', seed)

    gridsearch_kwargs = {
        'refit': False,
        'scoring': 'neg_median_absolute_error'
    }
    bagging_kwargs = {
        'n_estimators': getattr(args, 'n_rounds', 10),
        'max_samples': 0.75,
        'bootstrap': False,
        'random_state': seed,
    }

    if len(y_train.shape) == 1: y_train = y_train[:, None]
    valid = np.isfinite(x_train).all(-1) & np.isfinite(y_train).all(-1)
    x_train = x_train[valid]
    y_train = y_train[valid]

    if scale:
        # x_scaler = TransformerPipeline([AUCTransformer(list(get_sensor_bands(sensor))), RobustScaler()])
        x_scaler = TransformerPipeline([RobustScaler()])
        y_scaler = TransformerPipeline(
            [LogTransformer(), MinMaxScaler((-1, 1))])
        x_scaler.fit(x_train)
        y_scaler.fit(y_train)
        x_test = x_scaler.transform(x_test)
        x_train = x_scaler.transform(x_train)
        y_train = y_scaler.transform(y_train)

    preprocess = lambda m: m.fit(x_train.copy(), y_train.copy())
    postprocess = None if not scale else y_scaler.inverse_transform

    if verbose and gridsearch:
        print('\nPerforming gridsearch...')

    if methods is None:
        methods = list(models.keys())

    other = {}
    estim = {}
    for method, params in models.items():
        if method not in methods: continue
        methods.remove(method)

        params['grid']['random_state'] = params['default'][
            'random_state'] = seed
        model_kwargs = params['default']
        model_class = params['class']
        n_jobs = 1 if method == 'MDN' else 3

        if y_train.shape[1] > 1:
            model_class = lambda *args, **kwargs: MultiOutputRegressor(params[
                'class'](*args, **kwargs))

        with GlobalRandomManager(seed):
            if gridsearch and method != 'SVM':
                model = GridSearchCV(model_class(),
                                     params['grid'],
                                     n_jobs=n_jobs,
                                     **gridsearch_kwargs)
                model.fit(x_train.copy(), y_train.copy())

                model_kwargs = model.best_params_
                if verbose: print(f'Best {method} params: {model_kwargs}')

            model = model_class(**model_kwargs)
            if bagging: model = BaggingRegressor(model, **bagging_kwargs)

            model.__name__ = method
            estim[method] = _create_estimates(model,
                                              x_test,
                                              postprocess,
                                              preprocess,
                                              verbose=verbose,
                                              **kwargs)

            if x_other is not None:
                other[method] = _create_estimates(model, x_other, postprocess)

    if len(methods):
        print(f'Unknown ML benchmark methods requested: {methods}')

    if len(other):
        return estim, other
    return estim
Esempio n. 2
0
def _bench_ml(sensor,
              x_train,
              y_train,
              x_test,
              *,
              x_other=None,
              verbose=False,
              seed=42,
              bagging=True,
              gridsearch=False,
              scale=True,
              **kwargs):

    from sklearn.preprocessing import RobustScaler, MinMaxScaler
    from sklearn.model_selection import GridSearchCV
    from sklearn.ensemble import BaggingRegressor
    from .Benchmarks.ML import models

    seed = getattr(getattr(kwargs, 'args', None), 'seed', seed)
    gridsearch_kwargs = {
        'refit': False,
        'scoring': 'neg_median_absolute_error'
    }
    bagging_kwargs = {
        'n_estimators': 10,
        'max_samples': 0.75,
        'bootstrap': False,
        'random_state': seed,
    }

    if scale:
        x_scaler = TransformerPipeline([RobustScaler()])
        y_scaler = TransformerPipeline(
            [LogTransformer(), MinMaxScaler((-1, 1))])
        x_scaler.fit(x_train)
        y_scaler.fit(y_train)
        x_test = x_scaler.transform(x_test)
        x_train = x_scaler.transform(x_train)
        y_train = y_scaler.transform(y_train).flatten()

    preprocess = lambda m: m.fit(x_train.copy(), y_train.copy())
    postprocess = None if not scale else y_scaler.inverse_transform

    if verbose and gridsearch:
        print('\nPerforming gridsearch...')

    other = {}
    estim = {}
    for method, params in models.items():
        params['grid']['random_state'] = params['default'][
            'random_state'] = seed
        model_kwargs = params['default']

        with GlobalRandomManager(seed):
            if gridsearch and method != 'SVM':
                model = GridSearchCV(params['class'](),
                                     params['grid'],
                                     n_jobs=3 if method != 'MDN' else 1,
                                     **gridsearch_kwargs)
                model.fit(x_train.copy(), y_train.copy())

                model_kwargs = model.best_params_
                if verbose: print(f'Best {method} params: {model_kwargs}')

            model = params['class'](**model_kwargs)
            if bagging: model = BaggingRegressor(model, **bagging_kwargs)

            model.__name__ = method
            estim[method] = _create_estimates(model,
                                              x_test,
                                              postprocess,
                                              preprocess,
                                              verbose=verbose,
                                              **kwargs)

            if x_other is not None:
                other[method] = _create_estimates(model, x_other, postprocess)

    if len(other):
        return estim, other
    return estim