def explain_rf_feature_importance(estimator, vec=None, top=_TOP, target_names=None, # ignored targets=None, # ignored feature_names=None, feature_re=None, feature_filter=None, ): """ Return an explanation of a tree-based ensemble estimator. See :func:`eli5.explain_weights` for description of ``top``, ``feature_names``, ``feature_re`` and ``feature_filter`` parameters. ``target_names`` and ``targets`` parameters are ignored. ``vec`` is a vectorizer instance used to transform raw features to the input of the estimator (e.g. a fitted CountVectorizer instance); you can pass it instead of ``feature_names``. """ coef = estimator.feature_importances_ trees = np.array(estimator.estimators_).ravel() coef_std = np.std([tree.feature_importances_ for tree in trees], axis=0) return get_feature_importance_explanation(estimator, vec, coef, coef_std=coef_std, feature_names=feature_names, feature_filter=feature_filter, feature_re=feature_re, top=top, description=DESCRIPTION_RANDOM_FOREST, is_regression=isinstance(estimator, RegressorMixin), )
def explain_permutation_importance(estimator, vec=None, top=_TOP, target_names=None, # ignored targets=None, # ignored feature_names=None, feature_re=None, feature_filter=None, ): """ Return an explanation of PermutationImportance. See :func:`eli5.explain_weights` for description of ``top``, ``feature_names``, ``feature_re`` and ``feature_filter`` parameters. ``target_names`` and ``targets`` parameters are ignored. ``vec`` is a vectorizer instance used to transform raw features to the input of the estimator (e.g. a fitted CountVectorizer instance); you can pass it instead of ``feature_names``. """ coef = estimator.feature_importances_ coef_std = estimator.feature_importances_std_ return get_feature_importance_explanation(estimator, vec, coef, coef_std=coef_std, feature_names=feature_names, feature_filter=feature_filter, feature_re=feature_re, top=top, description=DESCRIPTION_SCORE_DECREASE + estimator.caveats_, is_regression=isinstance(estimator.wrapped_estimator_, RegressorMixin), )
def explain_weights_xgboost( xgb, vec=None, top=20, target_names=None, # ignored targets=None, # ignored feature_names=None, feature_re=None, # type: Pattern[str] feature_filter=None, importance_type='gain', ): """ Return an explanation of an XGBoost estimator (via scikit-learn wrapper XGBClassifier or XGBRegressor, or via xgboost.Booster) as feature importances. See :func:`eli5.explain_weights` for description of ``top``, ``feature_names``, ``feature_re`` and ``feature_filter`` parameters. ``target_names`` and ``targets`` parameters are ignored. Parameters ---------- importance_type : str, optional A way to get feature importance. Possible values are: - 'gain' - the average gain of the feature when it is used in trees (default) - 'weight' - the number of times a feature is used to split the data across all trees - 'cover' - the average coverage of the feature when it is used in trees """ booster, is_regression = _check_booster_args(xgb) xgb_feature_names = booster.feature_names coef = _xgb_feature_importances(booster, importance_type=importance_type) return get_feature_importance_explanation( xgb, vec, coef, feature_names=feature_names, estimator_feature_names=xgb_feature_names, feature_filter=feature_filter, feature_re=feature_re, top=top, description=DESCRIPTION_XGBOOST, is_regression=is_regression, num_features=coef.shape[-1], )
def explain_weights_catboost(catb, vec=None, top=20, importance_type='PredictionValuesChange', feature_names=None, pool=None): """ Return an explanation of an CatBoost estimator (CatBoostClassifier, CatBoost, CatBoostRegressor) as feature importances. See :func:`eli5.explain_weights` for description of ``top``, ``feature_names``, ``feature_re`` and ``feature_filter`` parameters. ``target_names`` and ``targets`` parameters are ignored. Parameters ---------- :param 'importance_type' : str, optional A way to get feature importance. Possible values are: - 'PredictionValuesChange' (default) - The individual importance values for each of the input features. - 'LossFunctionChange' - The individual importance values for each of the input features for ranking metrics (requires training data to be passed or a similar dataset with Pool) :param 'pool' : catboost.Pool, optional To be passed if explain_weights_catboost has importance_type set to LossFunctionChange. The catboost feature_importances uses the Pool datatype to calculate the parameter for the specific importance_type. """ is_regression = _is_regression(catb) catb_feature_names = catb.feature_names_ coef = _catb_feature_importance(catb, importance_type=importance_type, pool=pool) return get_feature_importance_explanation( catb, vec, coef, feature_names=feature_names, estimator_feature_names=catb_feature_names, feature_filter=None, feature_re=None, top=top, description=DESCRIPTION_CATBOOST, is_regression=is_regression, num_features=coef.shape[-1])
def explain_weights_lightgbm( lgb, vec=None, top=20, target_names=None, # ignored targets=None, # ignored feature_names=None, feature_re=None, feature_filter=None, importance_type='gain', ): """ Return an explanation of an LightGBM estimator (via scikit-learn wrapper LGBMClassifier or LGBMRegressor) as feature importances. See :func:`eli5.explain_weights` for description of ``top``, ``feature_names``, ``feature_re`` and ``feature_filter`` parameters. ``target_names`` and ``targets`` parameters are ignored. Parameters ---------- importance_type : str, optional A way to get feature importance. Possible values are: - 'gain' - the average gain of the feature when it is used in trees (default) - 'split' - the number of times a feature is used to split the data across all trees - 'weight' - the same as 'split', for compatibility with xgboost """ coef = _get_lgb_feature_importances(lgb, importance_type) lgb_feature_names = lgb.booster_.feature_name() return get_feature_importance_explanation( lgb, vec, coef, feature_names=feature_names, estimator_feature_names=lgb_feature_names, feature_filter=feature_filter, feature_re=feature_re, top=top, description=DESCRIPTION_LIGHTGBM, num_features=coef.shape[-1], is_regression=isinstance(lgb, lightgbm.LGBMRegressor), )