def _bench_ml(sensor, x_train, y_train, x_test, *, x_other=None, verbose=False, seed=42, bagging=True, gridsearch=False, scale=True, methods=None, **kwargs): from sklearn.preprocessing import RobustScaler, MinMaxScaler from sklearn.model_selection import GridSearchCV from sklearn.multioutput import MultiOutputRegressor from sklearn.ensemble import BaggingRegressor from .ML import models args = getattr(kwargs, 'args', None) seed = getattr(args, 'seed', seed) gridsearch_kwargs = { 'refit': False, 'scoring': 'neg_median_absolute_error' } bagging_kwargs = { 'n_estimators': getattr(args, 'n_rounds', 10), 'max_samples': 0.75, 'bootstrap': False, 'random_state': seed, } if len(y_train.shape) == 1: y_train = y_train[:, None] valid = np.isfinite(x_train).all(-1) & np.isfinite(y_train).all(-1) x_train = x_train[valid] y_train = y_train[valid] if scale: # x_scaler = TransformerPipeline([AUCTransformer(list(get_sensor_bands(sensor))), RobustScaler()]) x_scaler = TransformerPipeline([RobustScaler()]) y_scaler = TransformerPipeline( [LogTransformer(), MinMaxScaler((-1, 1))]) x_scaler.fit(x_train) y_scaler.fit(y_train) x_test = x_scaler.transform(x_test) x_train = x_scaler.transform(x_train) y_train = y_scaler.transform(y_train) preprocess = lambda m: m.fit(x_train.copy(), y_train.copy()) postprocess = None if not scale else y_scaler.inverse_transform if verbose and gridsearch: print('\nPerforming gridsearch...') if methods is None: methods = list(models.keys()) other = {} estim = {} for method, params in models.items(): if method not in methods: continue methods.remove(method) params['grid']['random_state'] = params['default'][ 'random_state'] = seed model_kwargs = params['default'] model_class = params['class'] n_jobs = 1 if method == 'MDN' else 3 if y_train.shape[1] > 1: model_class = lambda *args, **kwargs: MultiOutputRegressor(params[ 'class'](*args, **kwargs)) with GlobalRandomManager(seed): if gridsearch and method != 'SVM': model = GridSearchCV(model_class(), params['grid'], n_jobs=n_jobs, **gridsearch_kwargs) model.fit(x_train.copy(), y_train.copy()) model_kwargs = model.best_params_ if verbose: print(f'Best {method} params: {model_kwargs}') model = model_class(**model_kwargs) if bagging: model = BaggingRegressor(model, **bagging_kwargs) model.__name__ = method estim[method] = _create_estimates(model, x_test, postprocess, preprocess, verbose=verbose, **kwargs) if x_other is not None: other[method] = _create_estimates(model, x_other, postprocess) if len(methods): print(f'Unknown ML benchmark methods requested: {methods}') if len(other): return estim, other return estim
def _bench_ml(sensor, x_train, y_train, x_test, *, x_other=None, verbose=False, seed=42, bagging=True, gridsearch=False, scale=True, **kwargs): from sklearn.preprocessing import RobustScaler, MinMaxScaler from sklearn.model_selection import GridSearchCV from sklearn.ensemble import BaggingRegressor from .Benchmarks.ML import models seed = getattr(getattr(kwargs, 'args', None), 'seed', seed) gridsearch_kwargs = { 'refit': False, 'scoring': 'neg_median_absolute_error' } bagging_kwargs = { 'n_estimators': 10, 'max_samples': 0.75, 'bootstrap': False, 'random_state': seed, } if scale: x_scaler = TransformerPipeline([RobustScaler()]) y_scaler = TransformerPipeline( [LogTransformer(), MinMaxScaler((-1, 1))]) x_scaler.fit(x_train) y_scaler.fit(y_train) x_test = x_scaler.transform(x_test) x_train = x_scaler.transform(x_train) y_train = y_scaler.transform(y_train).flatten() preprocess = lambda m: m.fit(x_train.copy(), y_train.copy()) postprocess = None if not scale else y_scaler.inverse_transform if verbose and gridsearch: print('\nPerforming gridsearch...') other = {} estim = {} for method, params in models.items(): params['grid']['random_state'] = params['default'][ 'random_state'] = seed model_kwargs = params['default'] with GlobalRandomManager(seed): if gridsearch and method != 'SVM': model = GridSearchCV(params['class'](), params['grid'], n_jobs=3 if method != 'MDN' else 1, **gridsearch_kwargs) model.fit(x_train.copy(), y_train.copy()) model_kwargs = model.best_params_ if verbose: print(f'Best {method} params: {model_kwargs}') model = params['class'](**model_kwargs) if bagging: model = BaggingRegressor(model, **bagging_kwargs) model.__name__ = method estim[method] = _create_estimates(model, x_test, postprocess, preprocess, verbose=verbose, **kwargs) if x_other is not None: other[method] = _create_estimates(model, x_other, postprocess) if len(other): return estim, other return estim