def get_params(self, deep=True): subclass_params = BaseEstimator.get_params(self, deep) # HACK: get base class params using sklearn's get_params... # Otherwise it's impossible to change baseclass params in cross # validation. c = copy.copy(self) c.__class__ = MatrixCompletion baseclass_params = BaseEstimator.get_params(c, deep) subclass_params.update(baseclass_params) return subclass_params
def get_params(self, deep=True, **kwargs): params = BaseEstimator.get_params(self, deep=deep, **kwargs) # Callback parameters are not returned by .get_params, needs # special treatment. params_cb = self._get_params_callbacks(deep=deep) params.update(params_cb) return params
def get_params(self, deep=True): params = BaseEstimator.get_params(self, deep) params['dimensions'] = self.dimensions params['noise'] = self.noise params['epsilon'] = self.epsilon logging.debug("Getting params: %s", str(params)) return params
def has_cpu_params(estimator: BaseEstimator) -> bool: """Returns True if estimator has any CPU-related params.""" return any( any( param.endswith(cpu_param_name) for cpu_param_name in SKLEARN_CPU_PARAM_NAMES) for param in estimator.get_params(deep=True))
def get_params(self, deep=True): """ Get parameters for this estimator. Parameters ---------- deep : boolean, optional If True, will return the parameters for this estimator and contained subobjects that are estimators. Returns ------- params : mapping of string to any Parameter names mapped to their values. """ if self.compat: return BaseEstimator.get_params(self, deep) else: if self.estimator is not None: params = self.estimator.get_params(deep) else: # TODO: check if this is necessary params = dict() for p in self._get_param_names(): params[p] = getattr(self, p, None) return params
def _step_db(estimator: BaseEstimator, ids: Tuple): estimator.is_fitted_ = True # make a dictionary of parameters pms = () estimator_params = estimator.get_params() for key in sorted(estimator_params.keys()): value = estimator_params[key] if isinstance(value, Callable): pms = pms + (key, value.__name__) if value == "warn": pms = pms + (key, 10) # discard parameters which are not json serializable try: json.dumps(value) pms = pms + (key, value) except TypeError: continue query = ( json.dumps(estimator.train_), json.dumps(estimator.features_), json.dumps(pms), json.dumps(ids), ) entry = ( *query, pickle.dumps(estimator), ) return query, entry
def __init__(self, model: BaseEstimator, multi_output: bool = False): name = type(model).__name__ super().__init__(version=name) if multi_output: model = MultiOutputRegressor(model, -1) self.model = model self.params = model.get_params()
def create_estimators( estimator: BaseEstimator, n_splits: int ): # 交差検証用のモデルを交差分割数ぶん生成 params = estimator.get_params() estimators = [estimator.__class__(**params) for _ in range(n_splits)] return estimators
def set_cpu_params(estimator: BaseEstimator, num_cpus: int) -> None: """Sets all CPU-related params to num_cpus (incl. nested).""" cpu_params = { param: num_cpus for param in estimator.get_params(deep=True) if any( param.endswith(cpu_param_name) for cpu_param_name in SKLEARN_CPU_PARAM_NAMES) } estimator.set_params(**cpu_params)
def get_model_info(model: BaseEstimator): all_info = model.get_params() info = { key: value for key, value in all_info.items() if len(json.dumps(value, default=str)) < MAX_PARAMETER_VALUE_LENGTH } info['type'] = type(model).__name__ return info
def update_param_if_supported( estimator: BaseEstimator ,param_name:str ,param_value:Any ) -> BaseEstimator: current_params = estimator.get_params() if param_name in current_params: new_params = {**current_params, param_name:param_value} return type(estimator)(**new_params) return type(estimator)(**current_params)
def auto_mlflow( run_name: str, model_name: BaseEstimator, data_params: dict = None, X: np.ndarray = "X_train", y: np.ndarray = "y_train", ) -> str: """ Wrapper function that automates the application of mlflow to a model training event. Args: run_name (str): Desired name of the run, this will appear in the database model_name (BaseEstimator): Variable name of the sklearn estimator object (must refer to an already instantiated model) data_params (dict, optional): Dictionary containing params on the data e.g. {'standard_scaled': False}. Defaults to None. X (np.ndarray, optional): Feature array. Defaults to "X_train". y (np.ndarray, optional): Target array. Defaults to "y_train". Returns: str: Logs data to mlflow, also prints representation of evaluation scores to console """ with mlflow.start_run(run_name=run_name): model_name.fit(X, y) no_val_rmse, no_val_r2, val_rmse_scores, cv_mean, cv_std, cv_cov = score_model( model_name, X, y ) data_params = data_params model_params = model_name.get_params() mlflow.log_params(data_params) mlflow.log_params(model_params) mlflow.log_metrics( { "no_val_rmse": no_val_rmse, "no_val_r2": no_val_r2, "cv_score_1": val_rmse_scores[0], "cv_score_2": val_rmse_scores[1], "cv_score_3": val_rmse_scores[2], "cv_score_4": val_rmse_scores[3], "cv_score_5": val_rmse_scores[4], "cv_mean": cv_mean, "cv_std": cv_std, "cv_cov": cv_cov, } ) mlflow.sklearn.log_model(model_name, "model") return None
def cv_best_hyperparams(model: BaseEstimator, X, y, k_folds, degree_range, lambda_range): """ Cross-validate to find best hyperparameters with k-fold CV. :param X: Training data. :param y: Training targets. :param model: sklearn model. :param lambda_range: Range of values for the regularization hyperparam. :param degree_range: Range of values for the degree hyperparam. :param k_folds: Number of folds for splitting the training data into. :return: A dict containing the best model parameters, with some of the keys as returned by model.get_params() """ # TODO: Do K-fold cross validation to find the best hyperparameters # Notes: # - You can implement it yourself or use the built in sklearn utilities # (recommended). See the docs for the sklearn.model_selection package # http://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection # - If your model has more hyperparameters (not just lambda and degree) # you should add them to the search. # - Use get_params() on your model to see what hyperparameters is has # and their names. The parameters dict you return should use the same # names as keys. # - You can use MSE or R^2 as a score. # ====== YOUR CODE: ====== from copy import deepcopy as dc score_optimal = float('-inf') parameter = model.get_params() best_params = parameter.copy() for d in degree_range: for l in lambda_range: parameter['bostonfeaturestransformer'].degree, parameter[ 'bostonfeaturestransformer__degree'], parameter[ 'linearregressor'].reg_lambda, parameter[ 'linearregressor__reg_lambda'] = d, d, l, l score = np.mean( sklearn.model_selection.cross_validate( model.set_params(**parameter), X, y, cv=k_folds, scoring='neg_mean_squared_error')['test_score']) score_optimal = score if (score > score_optimal) else score_optimal best_params = dc(parameter) if ( score > score_optimal) else best_params # raise NotImplementedError() # ======================== return best_params
def get_params(self, deep=True): """ Hack that overrides the get_params routine of BaseEstimator. self.get_params() returns the input parameters of __init__. However it doesn't handle inheritance well, as we would like to include the input parameters to __init__ of all the parents as well. :returns: params - dictionary of parameters and their user-set value """ # This gets the params of OSPMRMP and puts them in a dictionary 'params' params = BaseEstimator.get_params(self) parent_init = super(_ONN, self).__init__ # This gets the parameters from _NN grandparent_init = super(_ONN, self).__init__ grandparent_init_signature = signature(grandparent_init) parameters_nn = ( p for p in grandparent_init_signature.parameters.values() if p.name != 'self' and p.kind != p.VAR_KEYWORD) for p in parameters_nn: if p.name in params: return InputError('This should never happen') if hasattr(self, p.name): params[p.name] = getattr(self, p.name) else: params[p.name] = p.default # Adding the parameters from _ONN, but leaving kwargs out parent_init = _ONN.__init__ parent_init_signature = signature(parent_init) parameters_onn = [] for p in parent_init_signature.parameters.values(): if p.name != 'self' and p.kind != p.VAR_KEYWORD: if p.name not in params: parameters_onn.append(p) for p in parameters_onn: if p.name in params: return InputError('This should never happen') if p.name == 'kwargs': continue if hasattr(self, p.name): params[p.name] = getattr(self, p.name) else: params[p.name] = p.default return params
def _decompose_node(step: BaseEstimator, prune_default_params: bool = False): """ Decompose a specific instance of a scikit-learn transformer, including Pipelines or FeatureUnions Parameters ---------- step An instance of a Scikit-Learn transformer class prune_default_params Whether to output the default parameter values into the definition. If True, only those parameters differing from the default params will be output. Returns ------- dict decomposed node - Where key is the import string for the class and associated value is a dict of parameters for that class. """ import_str = f"{step.__module__}.{step.__class__.__name__}" params = step.get_params(deep=False) for param, param_val in params.items(): if hasattr(param_val, "get_params"): params[param] = _decompose_node(param_val) # Handle parameter value that is a list elif isinstance(param_val, list): # Decompose second elements; these are tuples of (str, BaseEstimator) # or list of other types such as ints. # TODO: Make this more robust, probably via another function to parse the iterable recursively # TODO: b/c it _could_, in theory, be a dict of {str: BaseEstimator} or similar. params[param] = [ _decompose_node(leaf[1]) if isinstance(leaf, tuple) else leaf for leaf in param_val ] # Handle FunctionTransformer function object type parameters elif callable(param_val): # param_val is a function for FunctionTransformer.func init param params[param] = f"{param_val.__module__}.{param_val.__name__}" else: params[param] = param_val params = _prune_default_parameters( step, params) if prune_default_params else params return {import_str: params}
def get_params(self, deep=True): """Get parameters for this estimator. Parameters ---------- deep : boolean, optional If True, will return the parameters for this estimator and contained subobjects that are estimators. Returns ------- params : mapping of string to any Parameter names mapped to their values. """ out = BaseEstimator.get_params(self, deep=deep) out["forward_match"] = self.forward_match out["backward_match"] = self.backward_match
def __init__(self, model_type: str, model: BaseEstimator, data: pd.DataFrame, target: pd.Series, num_trials: int): if model_type not in ["regression", "classification"]: raise Exception("model_type must be regression or classification") self.model_type = model_type self.model = model if self.is_pipeline(): self.check_model_name() self.hyperparameters = model.get_params() self.data = data self.target = target self.num_trials = num_trials
def cv_best_hyperparams(model: BaseEstimator, X, y, k_folds, degree_range, lambda_range): """ Cross-validate to find best hyperparameters with k-fold CV. :param X: Training data. :param y: Training targets. :param model: sklearn model. :param lambda_range: Range of values for the regularization hyperparam. :param degree_range: Range of values for the degree hyperparam. :param k_folds: Number of folds for splitting the training data into. :return: A dict containing the best model parameters, with some of the keys as returned by model.get_params() """ # TODO: Do K-fold cross validation to find the best hyperparameters # Notes: # - You can implement it yourself or use the built in sklearn utilities # (recommended). See the docs for the sklearn.model_selection package # http://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection # - If your model has more hyperparameters (not just lambda and degree) # you should add them to the search. # - Use get_params() on your model to see what hyperparameters is has # and their names. The parameters dict you return should use the same # names as keys. # - You can use MSE or R^2 as a score. # ====== YOUR CODE: ====== DEGREE_PARAM = "bostonfeaturestransformer__degree" LAMBDA_PARAM = "linearregressor__reg_lambda" results = {} for degree in degree_range: for reg_lambda in lambda_range: params = model.get_params() params[DEGREE_PARAM] = degree params[LAMBDA_PARAM] = reg_lambda model.set_params(**params) scores = sklearn.model_selection.cross_val_score( model, X, y, scoring="neg_mean_squared_error", cv=k_folds) score = np.mean(scores) results[score] = params best_params = max(results.items(), key=lambda x: x[0])[1] # ======================== return best_params
def set_params(self, **params): BaseEstimator.set_params(self, **params) print(params) if 'use_feature_selection' in params: if not params['use_feature_selection']: print("FS is disabled") BaseEstimator.set_params(self, k='all') # # params['k'] = 'all' # # print(params['use_feature_selection']) # BaseEstimator.set_params(self, **params) # if 'k' in params: # print(params['k']) # if self.use_feature_selection: # # super(BaseEstimator, self).set_params(**params) # BaseEstimator.set_params(self, **params) # else: # print("don't set k, use_feature_select is false") print(BaseEstimator.get_params(self))
def get_params(self, deep=True): """ Get parameters for this estimator. Parameters ---------- deep : boolean, optional If True, will return the parameters for this estimator and contained subobjects that are estimators. Returns ------- params : mapping of string to any Parameter names mapped to their values. """ # TODO: check if this causes problems for wrapped nested estimators params = BaseEstimator.get_params(self, deep=False) params.update({p: getattr(self, p) for p in self._param_names}) return params
def get_hyperparameters(model: BaseEstimator, tag: str) -> pd.DataFrame: """Return a pandas DataFrame with the hyperparameters of the given model. :param model: Model to get hyperparameters :type model: BaseEstimator :param tag: Name of the model to get hyperparameters :type tag: str :return: A pandas DataFrame with the hyperparameters used in the given model. :rtype: pd.Dataframe """ hp = dict() all_hp = model.get_params() if tag == "LR": for i in all_hp: if i in ["penalty", "solver", "max_iter", "C"]: hp[i] = all_hp[i] elif tag == "LDA": for i in all_hp: if i in ["solver", "shrinkage", "tol"]: hp[i] = all_hp[i] elif tag == "KNN": for i in all_hp: if i in ["n_neighbors", "weights", "algorithm, leaf_size"]: hp[i] = all_hp[i] elif tag == "SVC": for i in all_hp: if i in [ "kernel", "gamma", "C", "decision_function_shape", "probability", ]: hp[i] = all_hp[i] return pd.DataFrame.from_records([hp], index=["hyperparams"])
def get_model_info(model: BaseEstimator): info = model.get_params() info['type'] = type(model).__name__ return info
def get_params_dict(estimator: BaseEstimator): params_dict = estimator.get_params(deep=False) return params_dict
def get_params(self, deep=True): return BaseEstimator.get_params(self, deep=deep)
def __init__(self, base_regressor: BaseEstimator, quantiles: list = None, quantile_range: Tuple = None, step: float = None, **base_params): """Initializes the QuantileRegressor instance. Initializes the quantile regressor by supplying the underlying sklearn estimator as well as fixed quantiles or a quantile range and step size. Args: base_regressor: The underlying sklearn estimator. Must implement a fit and predict method as well as accept loss and alpha parameters. fit_quantiles, optional: List of quantiles on which the model should be trained on. If no list is provided, the model falls back on the quantile_range and step parameters. quantile_range, optional: Tuple with a lower and higher quantile bound which provide a range for quantiles on which the model should be trained on. step, optional: Step size which is used to create the model quantile range. **base_params: Optional keyword arguments which will be passed on to the ``base_model``. Examples: The below example illustrates how an instance of the QuantileRegressor class can be initialized with a trained sklearn GradientBoostingRegressor instance. >>> gbr = GradientBoostingRegressor() >>> quantile_reg = QuantileRegressor(gbr, fit_quantiles=[0.4, 0.5, 0.55]) """ assert {'loss', 'alpha'}.issubset(base_regressor.get_params().keys()), \ 'Provided base_regressor instance doesn\'t accept quantile loss function.' assert quantiles is not None or (quantile_range is not None and step is not None), \ 'The variable fit_quantiles or the variables quantile_range and step must be specified.' params = {'loss': 'quantile', 'alpha': 0.5} base_regressor = clone(base_regressor) base_regressor.set_params(**base_params) base_regressor.set_params(**params) self.base_regressor = base_regressor self.fit_quantiles = quantiles self.quantile_range = quantile_range self.step = step model_dict = {} self._quantiles = [0.5] model_dict['0.5'] = base_regressor self.model_dict = model_dict quantiles = self.__quantile_creator() if self.fit_quantiles is None and quantile_range is not None and step is not None \ else self.fit_quantiles all_models = [self._create_model_from_quantile(q) for q in quantiles] for i in range(0, len(quantiles)): if quantiles[i] not in self.model_dict.keys(): self.model_dict['{}'.format(quantiles[i])] = all_models[i] quantiles = self._quantiles + quantiles quantiles = list(set(quantiles)) self._quantiles = sorted(quantiles)
def get_params(self, deep=True): params = BaseEstimator.get_params(self, deep) params['max_dimensions'] = self.max_dimensions params['beta'] = self.beta params['C'] = self.C return params
def _get_dict_representation(o: BaseEstimator) -> dict: return {'type': o.__class__.__name__, 'params': o.get_params()}
def get_params(self, deep=True): params = BaseEstimator.get_params(self, deep) params['beta'] = self.beta return params
def get_params(self, deep=True, **kwargs): return BaseEstimator.get_params(self, deep=deep, **kwargs)