Exemple #1
0
    def get_params(self, deep=True):
        subclass_params = BaseEstimator.get_params(self, deep)
        # HACK: get base class params using sklearn's get_params...
        # Otherwise it's impossible to change baseclass params in cross
        # validation.
        c = copy.copy(self)
        c.__class__ = MatrixCompletion
        baseclass_params = BaseEstimator.get_params(c, deep)

        subclass_params.update(baseclass_params)
        return subclass_params
Exemple #2
0
 def get_params(self, deep=True, **kwargs):
     params = BaseEstimator.get_params(self, deep=deep, **kwargs)
     # Callback parameters are not returned by .get_params, needs
     # special treatment.
     params_cb = self._get_params_callbacks(deep=deep)
     params.update(params_cb)
     return params
 def get_params(self, deep=True):
     params = BaseEstimator.get_params(self, deep)
     params['dimensions'] = self.dimensions
     params['noise'] = self.noise
     params['epsilon'] = self.epsilon
     logging.debug("Getting params: %s", str(params))
     return params
Exemple #4
0
def has_cpu_params(estimator: BaseEstimator) -> bool:
    """Returns True if estimator has any CPU-related params."""
    return any(
        any(
            param.endswith(cpu_param_name)
            for cpu_param_name in SKLEARN_CPU_PARAM_NAMES)
        for param in estimator.get_params(deep=True))
Exemple #5
0
    def get_params(self, deep=True):
        """ Get parameters for this estimator.

        Parameters
        ----------
        deep : boolean, optional
            If True, will return the parameters for this estimator and
            contained subobjects that are estimators.

        Returns
        -------
        params : mapping of string to any
            Parameter names mapped to their values.
        """

        if self.compat:
            return BaseEstimator.get_params(self, deep)

        else:
            if self.estimator is not None:
                params = self.estimator.get_params(deep)
            else:
                # TODO: check if this is necessary
                params = dict()

            for p in self._get_param_names():
                params[p] = getattr(self, p, None)

            return params
def _step_db(estimator: BaseEstimator, ids: Tuple):
    estimator.is_fitted_ = True
    # make a dictionary of parameters
    pms = ()
    estimator_params = estimator.get_params()
    for key in sorted(estimator_params.keys()):
        value = estimator_params[key]
        if isinstance(value, Callable):
            pms = pms + (key, value.__name__)

        if value == "warn":
            pms = pms + (key, 10)

        # discard parameters which are not json serializable
        try:
            json.dumps(value)
            pms = pms + (key, value)
        except TypeError:
            continue

    query = (
        json.dumps(estimator.train_),
        json.dumps(estimator.features_),
        json.dumps(pms),
        json.dumps(ids),
    )
    entry = (
        *query,
        pickle.dumps(estimator),
    )

    return query, entry
Exemple #7
0
    def __init__(self, model: BaseEstimator, multi_output: bool = False):
        name = type(model).__name__
        super().__init__(version=name)
        if multi_output:
            model = MultiOutputRegressor(model, -1)

        self.model = model
        self.params = model.get_params()
Exemple #8
0
 def create_estimators(
         estimator: BaseEstimator,
         n_splits: int
 ):
     # 交差検証用のモデルを交差分割数ぶん生成
     params = estimator.get_params()
     estimators = [estimator.__class__(**params) for _ in range(n_splits)]
     return estimators
Exemple #9
0
def set_cpu_params(estimator: BaseEstimator, num_cpus: int) -> None:
    """Sets all CPU-related params to num_cpus (incl. nested)."""
    cpu_params = {
        param: num_cpus
        for param in estimator.get_params(deep=True) if any(
            param.endswith(cpu_param_name)
            for cpu_param_name in SKLEARN_CPU_PARAM_NAMES)
    }
    estimator.set_params(**cpu_params)
Exemple #10
0
def get_model_info(model: BaseEstimator):
    all_info = model.get_params()
    info = {
        key: value
        for key, value in all_info.items()
        if len(json.dumps(value, default=str)) < MAX_PARAMETER_VALUE_LENGTH
    }
    info['type'] = type(model).__name__
    return info
Exemple #11
0
def update_param_if_supported(
        estimator: BaseEstimator
        ,param_name:str
        ,param_value:Any
        ) -> BaseEstimator:
    current_params = estimator.get_params()
    if param_name in current_params:
        new_params = {**current_params, param_name:param_value}
        return type(estimator)(**new_params)
    return type(estimator)(**current_params)
def auto_mlflow(
    run_name: str,
    model_name: BaseEstimator,
    data_params: dict = None,
    X: np.ndarray = "X_train",
    y: np.ndarray = "y_train",
) -> str:
    """
    Wrapper function that automates the application of mlflow to a model training event.

    Args:
        run_name (str): Desired name of the run, this will appear in the database
        model_name (BaseEstimator): Variable name of the sklearn estimator object
                                    (must refer to an already instantiated model)
        data_params (dict, optional): Dictionary containing params on the data
                                    e.g. {'standard_scaled': False}. Defaults to None.
        X (np.ndarray, optional): Feature array. Defaults to "X_train".
        y (np.ndarray, optional): Target array. Defaults to "y_train".

    Returns:
        str: Logs data to mlflow, also prints representation of evaluation scores to console
    """

    with mlflow.start_run(run_name=run_name):

        model_name.fit(X, y)

        no_val_rmse, no_val_r2, val_rmse_scores, cv_mean, cv_std, cv_cov = score_model(
            model_name, X, y
        )

        data_params = data_params
        model_params = model_name.get_params()

        mlflow.log_params(data_params)
        mlflow.log_params(model_params)

        mlflow.log_metrics(
            {
                "no_val_rmse": no_val_rmse,
                "no_val_r2": no_val_r2,
                "cv_score_1": val_rmse_scores[0],
                "cv_score_2": val_rmse_scores[1],
                "cv_score_3": val_rmse_scores[2],
                "cv_score_4": val_rmse_scores[3],
                "cv_score_5": val_rmse_scores[4],
                "cv_mean": cv_mean,
                "cv_std": cv_std,
                "cv_cov": cv_cov,
            }
        )

        mlflow.sklearn.log_model(model_name, "model")

    return None
Exemple #13
0
def cv_best_hyperparams(model: BaseEstimator, X, y, k_folds, degree_range,
                        lambda_range):
    """
    Cross-validate to find best hyperparameters with k-fold CV.
    :param X: Training data.
    :param y: Training targets.
    :param model: sklearn model.
    :param lambda_range: Range of values for the regularization hyperparam.
    :param degree_range: Range of values for the degree hyperparam.
    :param k_folds: Number of folds for splitting the training data into.
    :return: A dict containing the best model parameters,
        with some of the keys as returned by model.get_params()
    """

    # TODO: Do K-fold cross validation to find the best hyperparameters
    #  Notes:
    #  - You can implement it yourself or use the built in sklearn utilities
    #    (recommended). See the docs for the sklearn.model_selection package
    #    http://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection
    #  - If your model has more hyperparameters (not just lambda and degree)
    #    you should add them to the search.
    #  - Use get_params() on your model to see what hyperparameters is has
    #    and their names. The parameters dict you return should use the same
    #    names as keys.
    #  - You can use MSE or R^2 as a score.

    # ====== YOUR CODE: ======
    from copy import deepcopy as dc
    score_optimal = float('-inf')

    parameter = model.get_params()
    best_params = parameter.copy()

    for d in degree_range:
        for l in lambda_range:
            parameter['bostonfeaturestransformer'].degree, parameter[
                'bostonfeaturestransformer__degree'], parameter[
                    'linearregressor'].reg_lambda, parameter[
                        'linearregressor__reg_lambda'] = d, d, l, l
            score = np.mean(
                sklearn.model_selection.cross_validate(
                    model.set_params(**parameter),
                    X,
                    y,
                    cv=k_folds,
                    scoring='neg_mean_squared_error')['test_score'])
            score_optimal = score if (score > score_optimal) else score_optimal
            best_params = dc(parameter) if (
                score > score_optimal) else best_params

    # raise NotImplementedError()
    # ========================

    return best_params
Exemple #14
0
    def get_params(self, deep=True):
        """
        Hack that overrides the get_params routine of BaseEstimator.
        self.get_params() returns the input parameters of __init__. However it doesn't
        handle inheritance well, as we would like to include the input parameters to
        __init__ of all the parents as well.

        :returns: params - dictionary of parameters and their user-set value

        """
        # This gets the params of OSPMRMP and puts them in a dictionary 'params'
        params = BaseEstimator.get_params(self)
        parent_init = super(_ONN, self).__init__

        # This gets the parameters from _NN
        grandparent_init = super(_ONN, self).__init__
        grandparent_init_signature = signature(grandparent_init)

        parameters_nn = (
            p for p in grandparent_init_signature.parameters.values()
            if p.name != 'self' and p.kind != p.VAR_KEYWORD)

        for p in parameters_nn:
            if p.name in params:
                return InputError('This should never happen')

            if hasattr(self, p.name):
                params[p.name] = getattr(self, p.name)
            else:
                params[p.name] = p.default

        # Adding the parameters from _ONN, but leaving kwargs out
        parent_init = _ONN.__init__
        parent_init_signature = signature(parent_init)

        parameters_onn = []
        for p in parent_init_signature.parameters.values():
            if p.name != 'self' and p.kind != p.VAR_KEYWORD:
                if p.name not in params:
                    parameters_onn.append(p)

        for p in parameters_onn:
            if p.name in params:
                return InputError('This should never happen')

            if p.name == 'kwargs':
                continue

            if hasattr(self, p.name):
                params[p.name] = getattr(self, p.name)
            else:
                params[p.name] = p.default

        return params
Exemple #15
0
def _decompose_node(step: BaseEstimator, prune_default_params: bool = False):
    """
    Decompose a specific instance of a scikit-learn transformer,
    including Pipelines or FeatureUnions

    Parameters
    ----------
    step
        An instance of a Scikit-Learn transformer class
    prune_default_params
        Whether to output the default parameter values into the definition. If True,
        only those parameters differing from the default params will be output.

    Returns
    -------
    dict
        decomposed node - Where key is the import string for the class and associated value
        is a dict of parameters for that class.
    """

    import_str = f"{step.__module__}.{step.__class__.__name__}"
    params = step.get_params(deep=False)

    for param, param_val in params.items():

        if hasattr(param_val, "get_params"):
            params[param] = _decompose_node(param_val)

        # Handle parameter value that is a list
        elif isinstance(param_val, list):

            # Decompose second elements; these are tuples of (str, BaseEstimator)
            # or list of other types such as ints.
            # TODO: Make this more robust, probably via another function to parse the iterable recursively
            # TODO: b/c it _could_, in theory, be a dict of {str: BaseEstimator} or similar.
            params[param] = [
                _decompose_node(leaf[1]) if isinstance(leaf, tuple) else leaf
                for leaf in param_val
            ]

        # Handle FunctionTransformer function object type parameters
        elif callable(param_val):
            # param_val is a function for FunctionTransformer.func init param
            params[param] = f"{param_val.__module__}.{param_val.__name__}"

        else:
            params[param] = param_val
    params = _prune_default_parameters(
        step, params) if prune_default_params else params
    return {import_str: params}
Exemple #16
0
 def get_params(self, deep=True):
     """Get parameters for this estimator.
     Parameters
     ----------
     deep : boolean, optional
         If True, will return the parameters for this estimator and
         contained subobjects that are estimators.
     Returns
     -------
     params : mapping of string to any
         Parameter names mapped to their values.
     """
     out = BaseEstimator.get_params(self, deep=deep)
     out["forward_match"] = self.forward_match
     out["backward_match"] = self.backward_match
Exemple #17
0
 def __init__(self,
              model_type: str,
              model: BaseEstimator,
              data: pd.DataFrame,
              target: pd.Series,
              num_trials: int):
     if model_type not in ["regression", "classification"]:
         raise Exception("model_type must be regression or classification")
     self.model_type = model_type
     self.model = model
     if self.is_pipeline():
         self.check_model_name()
     self.hyperparameters = model.get_params()
     self.data = data
     self.target = target
     self.num_trials = num_trials
def cv_best_hyperparams(model: BaseEstimator, X, y, k_folds, degree_range,
                        lambda_range):
    """
    Cross-validate to find best hyperparameters with k-fold CV.
    :param X: Training data.
    :param y: Training targets.
    :param model: sklearn model.
    :param lambda_range: Range of values for the regularization hyperparam.
    :param degree_range: Range of values for the degree hyperparam.
    :param k_folds: Number of folds for splitting the training data into.
    :return: A dict containing the best model parameters,
        with some of the keys as returned by model.get_params()
    """

    # TODO: Do K-fold cross validation to find the best hyperparameters
    #  Notes:
    #  - You can implement it yourself or use the built in sklearn utilities
    #    (recommended). See the docs for the sklearn.model_selection package
    #    http://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection
    #  - If your model has more hyperparameters (not just lambda and degree)
    #    you should add them to the search.
    #  - Use get_params() on your model to see what hyperparameters is has
    #    and their names. The parameters dict you return should use the same
    #    names as keys.
    #  - You can use MSE or R^2 as a score.

    # ====== YOUR CODE: ======
    DEGREE_PARAM = "bostonfeaturestransformer__degree"
    LAMBDA_PARAM = "linearregressor__reg_lambda"

    results = {}
    for degree in degree_range:
        for reg_lambda in lambda_range:
            params = model.get_params()
            params[DEGREE_PARAM] = degree
            params[LAMBDA_PARAM] = reg_lambda
            model.set_params(**params)
            scores = sklearn.model_selection.cross_val_score(
                model, X, y, scoring="neg_mean_squared_error", cv=k_folds)
            score = np.mean(scores)
            results[score] = params

    best_params = max(results.items(), key=lambda x: x[0])[1]

    # ========================

    return best_params
Exemple #19
0
    def set_params(self, **params):
        BaseEstimator.set_params(self, **params)

        print(params)
        if 'use_feature_selection' in params:
            if not params['use_feature_selection']:
                print("FS is disabled")
                BaseEstimator.set_params(self, k='all')
        # # params['k'] = 'all'
        #         # print(params['use_feature_selection'])
        #     BaseEstimator.set_params(self, **params)
        # if 'k' in params:
        #     print(params['k'])
        #     if self.use_feature_selection:
        # # super(BaseEstimator, self).set_params(**params)
        #         BaseEstimator.set_params(self, **params)
        #     else:
        #         print("don't set k, use_feature_select is false")
        print(BaseEstimator.get_params(self))
Exemple #20
0
    def get_params(self, deep=True):
        """ Get parameters for this estimator.

        Parameters
        ----------
        deep : boolean, optional
            If True, will return the parameters for this estimator and
            contained subobjects that are estimators.

        Returns
        -------
        params : mapping of string to any
            Parameter names mapped to their values.
        """

        # TODO: check if this causes problems for wrapped nested estimators
        params = BaseEstimator.get_params(self, deep=False)
        params.update({p: getattr(self, p) for p in self._param_names})

        return params
    def get_hyperparameters(model: BaseEstimator, tag: str) -> pd.DataFrame:
        """Return a pandas DataFrame with the hyperparameters of the given model.

        :param model: Model to get hyperparameters
        :type model: BaseEstimator
        :param tag: Name of the model to get hyperparameters
        :type tag: str
        :return: A pandas DataFrame with the hyperparameters used in the given model.
        :rtype: pd.Dataframe
        """
        hp = dict()
        all_hp = model.get_params()
        if tag == "LR":
            for i in all_hp:
                if i in ["penalty", "solver", "max_iter", "C"]:
                    hp[i] = all_hp[i]
        elif tag == "LDA":
            for i in all_hp:
                if i in ["solver", "shrinkage", "tol"]:
                    hp[i] = all_hp[i]
        elif tag == "KNN":
            for i in all_hp:
                if i in ["n_neighbors", "weights", "algorithm, leaf_size"]:
                    hp[i] = all_hp[i]
        elif tag == "SVC":
            for i in all_hp:
                if i in [
                        "kernel",
                        "gamma",
                        "C",
                        "decision_function_shape",
                        "probability",
                ]:
                    hp[i] = all_hp[i]

        return pd.DataFrame.from_records([hp], index=["hyperparams"])
Exemple #22
0
def get_model_info(model: BaseEstimator):
    info = model.get_params()
    info['type'] = type(model).__name__
    return info
Exemple #23
0
def get_params_dict(estimator: BaseEstimator):
    params_dict = estimator.get_params(deep=False)
    return params_dict
Exemple #24
0
 def get_params(self, deep=True):
     return BaseEstimator.get_params(self, deep=deep)
Exemple #25
0
    def __init__(self,
                 base_regressor: BaseEstimator,
                 quantiles: list = None,
                 quantile_range: Tuple = None,
                 step: float = None,
                 **base_params):
        """Initializes the QuantileRegressor instance.
        Initializes the quantile regressor by supplying the underlying
        sklearn estimator as well as fixed quantiles or a quantile range
        and step size.
        Args:
            base_regressor: The underlying sklearn estimator.
              Must implement a fit and predict method as well as accept loss and alpha parameters.
            fit_quantiles, optional: List of quantiles on which the model should be trained on.
              If no list is provided, the model falls back on the quantile_range and step parameters.
            quantile_range, optional: Tuple with a lower and higher quantile bound which 
              provide a range for quantiles on which the model should be trained on.
            step, optional: Step size which is used to create the model quantile range.
            **base_params: Optional keyword arguments which will be passed on
            to the ``base_model``.
        Examples:
            The below example illustrates how an instance of the 
            QuantileRegressor class can be initialized with a trained 
            sklearn GradientBoostingRegressor instance.
            >>> gbr = GradientBoostingRegressor()
            >>> quantile_reg = QuantileRegressor(gbr, fit_quantiles=[0.4, 0.5, 0.55])
        """

        assert {'loss', 'alpha'}.issubset(base_regressor.get_params().keys()), \
                'Provided base_regressor instance doesn\'t accept quantile loss function.'

        assert quantiles is not None or (quantile_range is not None and step is not None), \
                'The variable fit_quantiles or the variables quantile_range and step must be specified.'

        params = {'loss': 'quantile', 'alpha': 0.5}

        base_regressor = clone(base_regressor)
        base_regressor.set_params(**base_params)
        base_regressor.set_params(**params)

        self.base_regressor = base_regressor
        self.fit_quantiles = quantiles
        self.quantile_range = quantile_range
        self.step = step

        model_dict = {}
        self._quantiles = [0.5]
        model_dict['0.5'] = base_regressor
        self.model_dict = model_dict

        quantiles = self.__quantile_creator() if self.fit_quantiles is None and quantile_range is not None and step is not None \
                                             else self.fit_quantiles

        all_models = [self._create_model_from_quantile(q) for q in quantiles]

        for i in range(0, len(quantiles)):
            if quantiles[i] not in self.model_dict.keys():
                self.model_dict['{}'.format(quantiles[i])] = all_models[i]

        quantiles = self._quantiles + quantiles
        quantiles = list(set(quantiles))
        self._quantiles = sorted(quantiles)
 def get_params(self, deep=True):
     params = BaseEstimator.get_params(self, deep)
     params['max_dimensions'] = self.max_dimensions
     params['beta'] = self.beta
     params['C'] = self.C
     return params
Exemple #27
0
def _get_dict_representation(o: BaseEstimator) -> dict:
    return {'type': o.__class__.__name__, 'params': o.get_params()}
 def get_params(self, deep=True):
     params = BaseEstimator.get_params(self, deep)
     params['beta'] = self.beta
     return params
Exemple #29
0
 def get_params(self, deep=True, **kwargs):
     return BaseEstimator.get_params(self, deep=deep, **kwargs)
Exemple #30
0
 def get_params(self, deep=True):
     return BaseEstimator.get_params(self, deep=deep)