Python handle_vecの例、eli5.sklearn.utils.handle_vec Pythonの例

コード例 #1

0

ファイルを表示

ファイル: explain_prediction.py プロジェクト: zanachka/eli5

def explain_prediction_tree_regressor(reg,
                                      doc,
                                      vec=None,
                                      top=None,
                                      top_targets=None,
                                      target_names=None,
                                      targets=None,
                                      feature_names=None,
                                      feature_re=None,
                                      feature_filter=None,
                                      vectorized=False):
    """ Explain prediction of a tree regressor.

    See :func:`eli5.explain_prediction` for description of
    ``top``, ``top_targets``, ``target_names``, ``targets``,
    ``feature_names``, ``feature_re`` and ``feature_filter`` parameters.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the regressor ``reg``
    (e.g. a fitted CountVectorizer instance); you can pass it
    instead of ``feature_names``.

    ``vectorized`` is a flag which tells eli5 if ``doc`` should be
    passed through ``vec`` or not. By default it is False, meaning that
    if ``vec`` is not None, ``vec.transform([doc])`` is passed to the
    regressor. Set it to True if you're passing ``vec``,
    but ``doc`` is already vectorized.

    Method for determining feature importances follows an idea from
    http://blog.datadive.net/interpreting-random-forests/.
    Feature weights are calculated by following decision paths in trees
    of an ensemble (or a single tree for DecisionTreeRegressor).
    Each node of the tree has an output score, and contribution of a feature
    on the decision path is how much the score changes from parent to child.
    Weights of all features sum to the output score of the estimator.
    """
    vec, feature_names = handle_vec(reg, doc, vec, vectorized, feature_names)
    X = get_X(doc, vec=vec, vectorized=vectorized)
    if feature_names.bias_name is None:
        # Tree estimators do not have an intercept, but here we interpret
        # them as having an intercept
        feature_names.bias_name = '<BIAS>'

    score, = reg.predict(X)
    num_targets = getattr(reg, 'n_outputs_', 1)
    is_multitarget = num_targets > 1
    feature_weights = _trees_feature_weights(reg, X, feature_names,
                                             num_targets)
    x = get_X0(add_intercept(X))
    flt_feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re, x)

    def _weights(label_id, scale=1.0):
        weights = feature_weights[:, label_id]
        return get_top_features_filtered(x, flt_feature_names, flt_indices,
                                         weights, top, scale)

    res = Explanation(
        estimator=repr(reg),
        method='decision path',
        description=(DESCRIPTION_TREE_REG_MULTITARGET
                     if is_multitarget else DESCRIPTION_TREE_REG),
        targets=[],
        is_regression=True,
    )
    assert res.targets is not None

    names = get_default_target_names(reg, num_targets=num_targets)
    display_names = get_target_display_names(names, target_names, targets,
                                             top_targets, score)

    if is_multitarget:
        for label_id, label in display_names:
            target_expl = TargetExplanation(
                target=label,
                feature_weights=_weights(label_id),
                score=score[label_id],
            )
            add_weighted_spans(doc, vec, vectorized, target_expl)
            res.targets.append(target_expl)
    else:
        target_expl = TargetExplanation(
            target=display_names[0][1],
            feature_weights=_weights(0),
            score=score,
        )
        add_weighted_spans(doc, vec, vectorized, target_expl)
        res.targets.append(target_expl)

    return res

コード例 #2

0

ファイルを表示

ファイル: explain_prediction.py プロジェクト: zanachka/eli5

def explain_prediction_linear_regressor(reg,
                                        doc,
                                        vec=None,
                                        top=None,
                                        top_targets=None,
                                        target_names=None,
                                        targets=None,
                                        feature_names=None,
                                        feature_re=None,
                                        feature_filter=None,
                                        vectorized=False):
    """
    Explain prediction of a linear regressor.

    See :func:`eli5.explain_prediction` for description of
    ``top``, ``top_targets``, ``target_names``, ``targets``,
    ``feature_names``, ``feature_re`` and ``feature_filter`` parameters.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the classifier ``clf``;
    you can pass it instead of ``feature_names``.

    ``vectorized`` is a flag which tells eli5 if ``doc`` should be
    passed through ``vec`` or not. By default it is False, meaning that
    if ``vec`` is not None, ``vec.transform([doc])`` is passed to the
    regressor ``reg``. Set it to True if you're passing ``vec``,
    but ``doc`` is already vectorized.
    """
    if isinstance(reg, (SVR, NuSVR)) and reg.kernel != 'linear':
        return explain_prediction_sklearn_not_supported(reg, doc)

    vec, feature_names = handle_vec(reg, doc, vec, vectorized, feature_names)
    X = get_X(doc, vec=vec, vectorized=vectorized, to_dense=True)

    score, = reg.predict(X)

    if has_intercept(reg):
        X = add_intercept(X)
    x = get_X0(X)

    feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re, x)

    res = Explanation(
        estimator=repr(reg),
        method='linear model',
        targets=[],
        is_regression=True,
    )
    assert res.targets is not None

    _weights = _linear_weights(reg, x, top, feature_names, flt_indices)
    names = get_default_target_names(reg)
    display_names = get_target_display_names(names, target_names, targets,
                                             top_targets, score)

    if is_multitarget_regressor(reg):
        for label_id, label in display_names:
            target_expl = TargetExplanation(
                target=label,
                feature_weights=_weights(label_id),
                score=score[label_id],
            )
            add_weighted_spans(doc, vec, vectorized, target_expl)
            res.targets.append(target_expl)
    else:
        target_expl = TargetExplanation(
            target=display_names[0][1],
            feature_weights=_weights(0),
            score=score,
        )
        add_weighted_spans(doc, vec, vectorized, target_expl)
        res.targets.append(target_expl)

    return res

コード例 #3

0

ファイルを表示

ファイル: explain_prediction.py プロジェクト: zanachka/eli5

def explain_prediction_tree_classifier(clf,
                                       doc,
                                       vec=None,
                                       top=None,
                                       top_targets=None,
                                       target_names=None,
                                       targets=None,
                                       feature_names=None,
                                       feature_re=None,
                                       feature_filter=None,
                                       vectorized=False):
    """ Explain prediction of a tree classifier.

    See :func:`eli5.explain_prediction` for description of
    ``top``, ``top_targets``, ``target_names``, ``targets``,
    ``feature_names``, ``feature_re`` and ``feature_filter`` parameters.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the classifier ``clf``
    (e.g. a fitted CountVectorizer instance); you can pass it
    instead of ``feature_names``.

    ``vectorized`` is a flag which tells eli5 if ``doc`` should be
    passed through ``vec`` or not. By default it is False, meaning that
    if ``vec`` is not None, ``vec.transform([doc])`` is passed to the
    classifier. Set it to True if you're passing ``vec``,
    but ``doc`` is already vectorized.

    Method for determining feature importances follows an idea from
    http://blog.datadive.net/interpreting-random-forests/.
    Feature weights are calculated by following decision paths in trees
    of an ensemble (or a single tree for DecisionTreeClassifier).
    Each node of the tree has an output score, and contribution of a feature
    on the decision path is how much the score changes from parent to child.
    Weights of all features sum to the output score or proba of the estimator.
    """
    vec, feature_names = handle_vec(clf, doc, vec, vectorized, feature_names)
    X = get_X(doc, vec=vec, vectorized=vectorized)
    if feature_names.bias_name is None:
        # Tree estimators do not have an intercept, but here we interpret
        # them as having an intercept
        feature_names.bias_name = '<BIAS>'

    proba = predict_proba(clf, X)
    if hasattr(clf, 'decision_function'):
        score, = clf.decision_function(X)
    else:
        score = None

    is_multiclass = clf.n_classes_ > 2
    feature_weights = _trees_feature_weights(clf, X, feature_names,
                                             clf.n_classes_)
    x = get_X0(add_intercept(X))
    flt_feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re, x)

    def _weights(label_id, scale=1.0):
        weights = feature_weights[:, label_id]
        return get_top_features_filtered(x, flt_feature_names, flt_indices,
                                         weights, top, scale)

    res = Explanation(
        estimator=repr(clf),
        method='decision path',
        targets=[],
        description=(DESCRIPTION_TREE_CLF_MULTICLASS
                     if is_multiclass else DESCRIPTION_TREE_CLF_BINARY),
    )
    assert res.targets is not None

    display_names = get_target_display_names(
        clf.classes_,
        target_names,
        targets,
        top_targets,
        score=score if score is not None else proba)

    if is_multiclass:
        for label_id, label in display_names:
            target_expl = TargetExplanation(
                target=label,
                feature_weights=_weights(label_id),
                score=score[label_id] if score is not None else None,
                proba=proba[label_id] if proba is not None else None,
            )
            add_weighted_spans(doc, vec, vectorized, target_expl)
            res.targets.append(target_expl)
    else:
        target, scale, label_id = get_binary_target_scale_label_id(
            score, display_names, proba)
        target_expl = TargetExplanation(
            target=target,
            feature_weights=_weights(label_id, scale=scale),
            score=score if score is not None else None,
            proba=proba[label_id] if proba is not None else None,
        )
        add_weighted_spans(doc, vec, vectorized, target_expl)
        res.targets.append(target_expl)

    return res

コード例 #4

0

ファイルを表示

ファイル: explain_prediction.py プロジェクト: zanachka/eli5

def explain_prediction_linear_classifier(
    clf,
    doc,
    vec=None,
    top=None,
    top_targets=None,
    target_names=None,
    targets=None,
    feature_names=None,
    feature_re=None,
    feature_filter=None,
    vectorized=False,
):
    """
    Explain prediction of a linear classifier.

    See :func:`eli5.explain_prediction` for description of
    ``top``, ``top_targets``, ``target_names``, ``targets``,
    ``feature_names``, ``feature_re`` and ``feature_filter`` parameters.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the classifier ``clf``
    (e.g. a fitted CountVectorizer instance); you can pass it
    instead of ``feature_names``.

    ``vectorized`` is a flag which tells eli5 if ``doc`` should be
    passed through ``vec`` or not. By default it is False, meaning that
    if ``vec`` is not None, ``vec.transform([doc])`` is passed to the
    classifier. Set it to True if you're passing ``vec``, but ``doc``
    is already vectorized.
    """
    vec, feature_names = handle_vec(clf, doc, vec, vectorized, feature_names)
    X = get_X(doc, vec=vec, vectorized=vectorized, to_dense=True)

    proba = predict_proba(clf, X)
    score, = clf.decision_function(X)

    if has_intercept(clf):
        X = add_intercept(X)
    x = get_X0(X)

    feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re, x)

    res = Explanation(
        estimator=repr(clf),
        method='linear model',
        targets=[],
    )
    assert res.targets is not None

    _weights = _linear_weights(clf, x, top, feature_names, flt_indices)
    classes = getattr(clf, "classes_", ["-1", "1"])  # OneClassSVM support
    display_names = get_target_display_names(classes, target_names, targets,
                                             top_targets, score)

    if is_multiclass_classifier(clf):
        for label_id, label in display_names:
            target_expl = TargetExplanation(
                target=label,
                feature_weights=_weights(label_id),
                score=score[label_id],
                proba=proba[label_id] if proba is not None else None,
            )
            add_weighted_spans(doc, vec, vectorized, target_expl)
            res.targets.append(target_expl)
    else:
        if len(display_names) == 1:  # target is passed explicitly
            label_id, target = display_names[0]
        else:
            label_id = 1 if score >= 0 else 0
            target = display_names[label_id][1]
        scale = -1 if label_id == 0 else 1

        target_expl = TargetExplanation(
            target=target,
            feature_weights=_weights(0, scale=scale),
            score=score,
            proba=proba[label_id] if proba is not None else None,
        )
        add_weighted_spans(doc, vec, vectorized, target_expl)
        res.targets.append(target_expl)

    return res

コード例 #5

0

ファイルを表示

ファイル: xgboost.py プロジェクト: gth158a/eli5

def explain_prediction_xgboost(
    xgb,
    doc,
    vec=None,
    top=None,
    top_targets=None,
    target_names=None,
    targets=None,
    feature_names=None,
    feature_re=None,
    feature_filter=None,
    vectorized=False,
):
    """ Return an explanation of XGBoost prediction (via scikit-learn wrapper
    XGBClassifier or XGBRegressor) as feature weights.

    See :func:`eli5.explain_prediction` for description of
    ``top``, ``top_targets``, ``target_names``, ``targets``,
    ``feature_names``, ``feature_re`` and ``feature_filter`` parameters.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the estimator ``xgb``
    (e.g. a fitted CountVectorizer instance); you can pass it
    instead of ``feature_names``.

    ``vectorized`` is a flag which tells eli5 if ``doc`` should be
    passed through ``vec`` or not. By default it is False, meaning that
    if ``vec`` is not None, ``vec.transform([doc])`` is passed to the
    estimator. Set it to True if you're passing ``vec``,
    but ``doc`` is already vectorized.

    Method for determining feature importances follows an idea from
    http://blog.datadive.net/interpreting-random-forests/.
    Feature weights are calculated by following decision paths in trees
    of an ensemble.
    Each leaf has an output score, and expected scores can also be assigned
    to parent nodes.
    Contribution of one feature on the decision path is how much expected score
    changes from parent to child.
    Weights of all features sum to the output score of the estimator.
    """
    xgb_feature_names = xgb.booster().feature_names
    vec, feature_names = handle_vec(xgb,
                                    doc,
                                    vec,
                                    vectorized,
                                    feature_names,
                                    num_features=len(xgb_feature_names))
    if feature_names.bias_name is None:
        # XGBoost estimators do not have an intercept, but here we interpret
        # them as having an intercept
        feature_names.bias_name = '<BIAS>'

    X = get_X(doc, vec, vectorized=vectorized)
    if sp.issparse(X):
        # Work around XGBoost issue:
        # https://github.com/dmlc/xgboost/issues/1238#issuecomment-243872543
        X = X.tocsc()

    proba = predict_proba(xgb, X)
    scores_weights = _prediction_feature_weights(xgb, X, feature_names,
                                                 xgb_feature_names)

    x = get_X0(add_intercept(X))
    x = _missing_values_set_to_nan(x, xgb.missing, sparse_missing=True)
    feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re, x)

    is_multiclass = _xgb_n_targets(xgb) > 1
    is_regression = isinstance(xgb, XGBRegressor)
    names = xgb.classes_ if not is_regression else ['y']

    def get_score_feature_weights(_label_id):
        _score, _feature_weights = scores_weights[_label_id]
        _x = x
        if flt_indices is not None:
            _x = mask(_x, flt_indices)
            _feature_weights = mask(_feature_weights, flt_indices)
        return _score, get_top_features(feature_names, _feature_weights, top,
                                        _x)

    return get_decision_path_explanation(
        xgb,
        doc,
        vec,
        vectorized=vectorized,
        original_display_names=names,
        target_names=target_names,
        targets=targets,
        top_targets=top_targets,
        is_regression=is_regression,
        is_multiclass=is_multiclass,
        proba=proba,
        get_score_feature_weights=get_score_feature_weights,
    )

コード例 #6

0

ファイルを表示

ファイル: lightgbm.py プロジェクト: qh582/eli5

def explain_prediction_lightgbm(
        lgb, doc,
        vec=None,
        top=None,
        top_targets=None,
        target_names=None,
        targets=None,
        feature_names=None,
        feature_re=None,
        feature_filter=None,
        vectorized=False,
        ):
    """ Return an explanation of LightGBM prediction (via scikit-learn wrapper
    LGBMClassifier or LGBMRegressor, or via lightgbm.Booster) as feature weights.

    See :func:`eli5.explain_prediction` for description of
    ``top``, ``top_targets``, ``target_names``, ``targets``,
    ``feature_names``, ``feature_re`` and ``feature_filter`` parameters.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the estimator ``xgb``
    (e.g. a fitted CountVectorizer instance); you can pass it
    instead of ``feature_names``.

    ``vectorized`` is a flag which tells eli5 if ``doc`` should be
    passed through ``vec`` or not. By default it is False, meaning that
    if ``vec`` is not None, ``vec.transform([doc])`` is passed to the
    estimator. Set it to True if you're passing ``vec``,
    but ``doc`` is already vectorized.

    Method for determining feature importances follows an idea from
    http://blog.datadive.net/interpreting-random-forests/.
    Feature weights are calculated by following decision paths in trees
    of an ensemble.
    Each leaf has an output score, and expected scores can also be assigned
    to parent nodes.
    Contribution of one feature on the decision path is how much expected score
    changes from parent to child.
    Weights of all features sum to the output score of the estimator.
    """

    booster, is_regression = _check_booster_args(lgb)
    lgb_feature_names = booster.feature_name()
    vec, feature_names = handle_vec(lgb, doc, vec, vectorized, feature_names,
        num_features=len(lgb_feature_names))
    if feature_names.bias_name is None:
        # LightGBM estimators do not have an intercept, but here we interpret
        # them as having an intercept
        feature_names.bias_name = '<BIAS>'
    X = get_X(doc, vec, vectorized=vectorized)
    
    if isinstance(lgb, lightgbm.Booster):
        prediction = lgb.predict(X)
        n_targets = prediction.shape[-1]
        if is_regression is None and target_names is None:
            # When n_targets is 1, this can be classification too.
            # It's safer to assume regression in this case, 
            # unless users set it as a classification problem by assigning 'target_names' input [0,1] etc.
            # If n_targets > 1, it must be classification.
            is_regression = n_targets == 1
        elif is_regression is None:
            is_regression = len(target_names) == 1 and n_targets == 1
            
        if is_regression:
            proba = None
        else:
            if n_targets == 1:
                p, = prediction
                proba = np.array([1 - p, p])
            else:
                proba, = prediction
    else:
        proba = predict_proba(lgb, X)
        n_targets = _lgb_n_targets(lgb)

    if is_regression:
        names = ['y']
    elif isinstance(lgb, lightgbm.Booster):
        names = np.arange(max(2, n_targets))
    else:
        names = lgb.classes_

    weight_dicts = _get_prediction_feature_weights(booster, X, n_targets)
    x = get_X0(add_intercept(X))

    def get_score_weights(_label_id):
        _weights = _target_feature_weights(
            weight_dicts[_label_id],
            num_features=len(feature_names),
            bias_idx=feature_names.bias_idx,
        )
        _score = _get_score(weight_dicts[_label_id])
        return _score, _weights

    return get_decision_path_explanation(
        lgb, doc, vec,
        x=x,
        feature_names=feature_names,
        feature_filter=feature_filter,
        feature_re=feature_re,
        top=top,
        vectorized=vectorized,
        original_display_names=names,
        target_names=target_names,
        targets=targets,
        top_targets=top_targets,
        is_regression=is_regression,
        is_multiclass=n_targets > 1,
        proba=proba,
        get_score_weights=get_score_weights,
     )

コード例 #7

0

ファイルを表示

ファイル: xgboost.py プロジェクト: phillipparr/NER

def explain_prediction_xgboost(
        xgb,
        doc,
        vec=None,
        top=None,
        top_targets=None,
        target_names=None,
        targets=None,
        feature_names=None,
        feature_re=None,  # type: Pattern[str]
        feature_filter=None,
        vectorized=False,  # type: bool
        is_regression=None,  # type: bool
        missing=None,  # type: bool
):
    """ Return an explanation of XGBoost prediction (via scikit-learn wrapper
    XGBClassifier or XGBRegressor, or via xgboost.Booster) as feature weights.

    See :func:`eli5.explain_prediction` for description of
    ``top``, ``top_targets``, ``target_names``, ``targets``,
    ``feature_names``, ``feature_re`` and ``feature_filter`` parameters.

    Parameters
    ----------
    vec : vectorizer, optional
        A vectorizer instance used to transform
        raw features to the input of the estimator ``xgb``
        (e.g. a fitted CountVectorizer instance); you can pass it
        instead of ``feature_names``.

    vectorized : bool, optional
        A flag which tells eli5 if ``doc`` should be
        passed through ``vec`` or not. By default it is False, meaning that
        if ``vec`` is not None, ``vec.transform([doc])`` is passed to the
        estimator. Set it to True if you're passing ``vec``,
        but ``doc`` is already vectorized.

    is_regression : bool, optional
        Pass if an ``xgboost.Booster`` is passed as the first argument.
        True if solving a regression problem ("objective" starts with "reg")
        and False for a classification problem.
        If not set, regression is assumed for a single target estimator
        and proba will not be shown.

    missing : optional
        Pass if an ``xgboost.Booster`` is passed as the first argument.
        Set it to the same value as the ``missing`` argument to
        ``xgboost.DMatrix``.
        Matters only if sparse values are used. Default is ``np.nan``.

    Method for determining feature importances follows an idea from
    http://blog.datadive.net/interpreting-random-forests/.
    Feature weights are calculated by following decision paths in trees
    of an ensemble.
    Each leaf has an output score, and expected scores can also be assigned
    to parent nodes.
    Contribution of one feature on the decision path is how much expected score
    changes from parent to child.
    Weights of all features sum to the output score of the estimator.
    """
    booster, is_regression = _check_booster_args(xgb, is_regression)
    xgb_feature_names = booster.feature_names
    vec, feature_names = handle_vec(xgb,
                                    doc,
                                    vec,
                                    vectorized,
                                    feature_names,
                                    num_features=len(xgb_feature_names))
    if feature_names.bias_name is None:
        # XGBoost estimators do not have an intercept, but here we interpret
        # them as having an intercept
        feature_names.bias_name = '<BIAS>'

    X = get_X(doc, vec, vectorized=vectorized)
    if sp.issparse(X):
        # Work around XGBoost issue:
        # https://github.com/dmlc/xgboost/issues/1238#issuecomment-243872543
        X = X.tocsc()

    if missing is None:
        missing = np.nan if isinstance(xgb, Booster) else xgb.missing
    dmatrix = DMatrix(X, missing=missing)

    if isinstance(xgb, Booster):
        prediction = xgb.predict(dmatrix)
        n_targets = prediction.shape[-1]  # type: int
        if is_regression is None:
            # When n_targets is 1, this can be classification too,
            # but it's safer to assume regression.
            # If n_targets > 1, it must be classification.
            is_regression = n_targets == 1
        if is_regression:
            proba = None
        else:
            if n_targets == 1:
                p, = prediction
                proba = np.array([1 - p, p])
            else:
                proba, = prediction
    else:
        proba = predict_proba(xgb, X)
        n_targets = _xgb_n_targets(xgb)

    if is_regression:
        names = ['y']
    elif isinstance(xgb, Booster):
        names = np.arange(max(2, n_targets))
    else:
        names = xgb.classes_

    scores_weights = _prediction_feature_weights(booster, dmatrix, n_targets,
                                                 feature_names,
                                                 xgb_feature_names)

    x = get_X0(add_intercept(X))
    x = _missing_values_set_to_nan(x, missing, sparse_missing=True)

    return get_decision_path_explanation(
        xgb,
        doc,
        vec,
        x=x,
        feature_names=feature_names,
        feature_filter=feature_filter,
        feature_re=feature_re,
        top=top,
        vectorized=vectorized,
        original_display_names=names,
        target_names=target_names,
        targets=targets,
        top_targets=top_targets,
        is_regression=is_regression,
        is_multiclass=n_targets > 1,
        proba=proba,
        get_score_weights=lambda label_id: scores_weights[label_id],
    )

コード例 #8

0

ファイルを表示

def explain_prediction_lightgbm(
    lgb,
    doc,
    vec=None,
    top=None,
    top_targets=None,
    target_names=None,
    targets=None,
    feature_names=None,
    feature_re=None,
    feature_filter=None,
    vectorized=False,
):
    """ Return an explanation of LightGBM prediction (via scikit-learn wrapper
    LGBMClassifier or LGBMRegressor) as feature weights.

    See :func:`eli5.explain_prediction` for description of
    ``top``, ``top_targets``, ``target_names``, ``targets``,
    ``feature_names``, ``feature_re`` and ``feature_filter`` parameters.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the estimator ``xgb``
    (e.g. a fitted CountVectorizer instance); you can pass it
    instead of ``feature_names``.

    ``vectorized`` is a flag which tells eli5 if ``doc`` should be
    passed through ``vec`` or not. By default it is False, meaning that
    if ``vec`` is not None, ``vec.transform([doc])`` is passed to the
    estimator. Set it to True if you're passing ``vec``,
    but ``doc`` is already vectorized.

    Method for determining feature importances follows an idea from
    http://blog.datadive.net/interpreting-random-forests/.
    Feature weights are calculated by following decision paths in trees
    of an ensemble.
    Each leaf has an output score, and expected scores can also be assigned
    to parent nodes.
    Contribution of one feature on the decision path is how much expected score
    changes from parent to child.
    Weights of all features sum to the output score of the estimator.
    """

    vec, feature_names = handle_vec(lgb, doc, vec, vectorized, feature_names)
    if feature_names.bias_name is None:
        # LightGBM estimators do not have an intercept, but here we interpret
        # them as having an intercept
        feature_names.bias_name = '<BIAS>'
    X = get_X(doc, vec, vectorized=vectorized)

    proba = predict_proba(lgb, X)
    weight_dicts = _get_prediction_feature_weights(lgb, X, _lgb_n_targets(lgb))
    x = get_X0(add_intercept(X))

    is_regression = isinstance(lgb, lightgbm.LGBMRegressor)
    is_multiclass = _lgb_n_targets(lgb) > 2
    names = lgb.classes_ if not is_regression else ['y']

    def get_score_weights(_label_id):
        _weights = _target_feature_weights(
            weight_dicts[_label_id],
            num_features=len(feature_names),
            bias_idx=feature_names.bias_idx,
        )
        _score = _get_score(weight_dicts[_label_id])
        return _score, _weights

    return get_decision_path_explanation(
        lgb,
        doc,
        vec,
        x=x,
        feature_names=feature_names,
        feature_filter=feature_filter,
        feature_re=feature_re,
        top=top,
        vectorized=vectorized,
        original_display_names=names,
        target_names=target_names,
        targets=targets,
        top_targets=top_targets,
        is_regression=is_regression,
        is_multiclass=is_multiclass,
        proba=proba,
        get_score_weights=get_score_weights,
    )