def explain_rf_feature_importance(estimator,
                                  vec=None,
                                  top=_TOP,
                                  target_names=None,  # ignored
                                  targets=None,  # ignored
                                  feature_names=None,
                                  feature_re=None,
                                  feature_filter=None,
                                  ):
    """
    Return an explanation of a tree-based ensemble estimator.

    See :func:`eli5.explain_weights` for description of
    ``top``, ``feature_names``, ``feature_re`` and ``feature_filter``
    parameters.

    ``target_names`` and ``targets`` parameters are ignored.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the estimator (e.g. a fitted
    CountVectorizer instance); you can pass it instead of ``feature_names``.
    """
    coef = estimator.feature_importances_
    trees = np.array(estimator.estimators_).ravel()
    coef_std = np.std([tree.feature_importances_ for tree in trees], axis=0)
    return get_feature_importance_explanation(estimator, vec, coef,
        coef_std=coef_std,
        feature_names=feature_names,
        feature_filter=feature_filter,
        feature_re=feature_re,
        top=top,
        description=DESCRIPTION_RANDOM_FOREST,
        is_regression=isinstance(estimator, RegressorMixin),
    )
def explain_permutation_importance(estimator,
                                   vec=None,
                                   top=_TOP,
                                   target_names=None,  # ignored
                                   targets=None,  # ignored
                                   feature_names=None,
                                   feature_re=None,
                                   feature_filter=None,
                                   ):
    """
    Return an explanation of PermutationImportance.

    See :func:`eli5.explain_weights` for description of
    ``top``, ``feature_names``, ``feature_re`` and ``feature_filter``
    parameters.

    ``target_names`` and ``targets`` parameters are ignored.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the estimator (e.g. a fitted
    CountVectorizer instance); you can pass it instead of ``feature_names``.
    """
    coef = estimator.feature_importances_
    coef_std = estimator.feature_importances_std_
    return get_feature_importance_explanation(estimator, vec, coef,
        coef_std=coef_std,
        feature_names=feature_names,
        feature_filter=feature_filter,
        feature_re=feature_re,
        top=top,
        description=DESCRIPTION_SCORE_DECREASE + estimator.caveats_,
        is_regression=isinstance(estimator.wrapped_estimator_, RegressorMixin),
    )
Exemple #3
0
def explain_weights_xgboost(
    xgb,
    vec=None,
    top=20,
    target_names=None,  # ignored
    targets=None,  # ignored
    feature_names=None,
    feature_re=None,  # type: Pattern[str]
    feature_filter=None,
    importance_type='gain',
):
    """
    Return an explanation of an XGBoost estimator (via scikit-learn wrapper
    XGBClassifier or XGBRegressor, or via xgboost.Booster)
    as feature importances.

    See :func:`eli5.explain_weights` for description of
    ``top``, ``feature_names``,
    ``feature_re`` and ``feature_filter`` parameters.

    ``target_names`` and ``targets`` parameters are ignored.

    Parameters
    ----------
    importance_type : str, optional
        A way to get feature importance. Possible values are:

        - 'gain' - the average gain of the feature when it is used in trees
          (default)
        - 'weight' - the number of times a feature is used to split the data
          across all trees
        - 'cover' - the average coverage of the feature when it is used in trees
    """
    booster, is_regression = _check_booster_args(xgb)
    xgb_feature_names = booster.feature_names
    coef = _xgb_feature_importances(booster, importance_type=importance_type)
    return get_feature_importance_explanation(
        xgb,
        vec,
        coef,
        feature_names=feature_names,
        estimator_feature_names=xgb_feature_names,
        feature_filter=feature_filter,
        feature_re=feature_re,
        top=top,
        description=DESCRIPTION_XGBOOST,
        is_regression=is_regression,
        num_features=coef.shape[-1],
    )
def explain_weights_catboost(catb,
                             vec=None,
                             top=20,
                             importance_type='PredictionValuesChange',
                             feature_names=None,
                             pool=None):
    """
    Return an explanation of an CatBoost estimator (CatBoostClassifier,
    CatBoost, CatBoostRegressor) as feature importances.

    See :func:`eli5.explain_weights` for description of
    ``top``, ``feature_names``,
    ``feature_re`` and ``feature_filter`` parameters.

    ``target_names`` and ``targets`` parameters are ignored.

    Parameters
    ----------
    :param 'importance_type' : str, optional
        A way to get feature importance. Possible values are:
        
        - 'PredictionValuesChange' (default) - The individual importance
          values for each of the input features.
        - 'LossFunctionChange' - The individual importance values for
          each of the input features for ranking metrics
          (requires training data to be passed or a similar dataset with Pool)

    :param 'pool' : catboost.Pool, optional
        To be passed if explain_weights_catboost has importance_type set
        to LossFunctionChange. The catboost feature_importances uses the Pool
        datatype to calculate the parameter for the specific importance_type.
    """
    is_regression = _is_regression(catb)
    catb_feature_names = catb.feature_names_
    coef = _catb_feature_importance(catb,
                                    importance_type=importance_type,
                                    pool=pool)
    return get_feature_importance_explanation(
        catb,
        vec,
        coef,
        feature_names=feature_names,
        estimator_feature_names=catb_feature_names,
        feature_filter=None,
        feature_re=None,
        top=top,
        description=DESCRIPTION_CATBOOST,
        is_regression=is_regression,
        num_features=coef.shape[-1])
Exemple #5
0
def explain_weights_lightgbm(
    lgb,
    vec=None,
    top=20,
    target_names=None,  # ignored
    targets=None,  # ignored
    feature_names=None,
    feature_re=None,
    feature_filter=None,
    importance_type='gain',
):
    """
    Return an explanation of an LightGBM estimator (via scikit-learn wrapper
    LGBMClassifier or LGBMRegressor) as feature importances.

    See :func:`eli5.explain_weights` for description of
    ``top``, ``feature_names``,
    ``feature_re`` and ``feature_filter`` parameters.

    ``target_names`` and ``targets`` parameters are ignored.
    
    Parameters
    ----------
    importance_type : str, optional
        A way to get feature importance. Possible values are:

        - 'gain' - the average gain of the feature when it is used in trees
          (default)
        - 'split' - the number of times a feature is used to split the data
          across all trees
        - 'weight' - the same as 'split', for compatibility with xgboost
    """
    coef = _get_lgb_feature_importances(lgb, importance_type)
    lgb_feature_names = lgb.booster_.feature_name()
    return get_feature_importance_explanation(
        lgb,
        vec,
        coef,
        feature_names=feature_names,
        estimator_feature_names=lgb_feature_names,
        feature_filter=feature_filter,
        feature_re=feature_re,
        top=top,
        description=DESCRIPTION_LIGHTGBM,
        num_features=coef.shape[-1],
        is_regression=isinstance(lgb, lightgbm.LGBMRegressor),
    )