コード例 #1
0
ファイル: test_sklearn_utils.py プロジェクト: zevcc-gh/eli5
def test_get_default_target_names():
    clf = SGDRegressor(**SGD_KWARGS)
    X, y = make_regression(n_targets=1, n_features=3)
    clf.fit(X, y)
    assert set(get_default_target_names(clf)) == {'y'}

    clf = ElasticNet()
    X, y = make_regression(n_targets=2, n_features=3)
    clf.fit(X, y)
    assert set(get_default_target_names(clf)) == {'y0', 'y1'}
コード例 #2
0
def explain_prediction_linear_regressor(reg,
                                        doc,
                                        vec=None,
                                        top=None,
                                        target_names=None,
                                        targets=None,
                                        feature_names=None,
                                        vectorized=False):
    """ Explain prediction of a linear regressor. """
    vec, feature_names = _handle_vec(reg, doc, vec, vectorized, feature_names)
    X = _get_X(doc, vec=vec, vectorized=vectorized)

    score, = reg.predict(X)

    if has_intercept(reg):
        X = _add_intercept(X)
    x, = X

    res = Explanation(
        estimator=repr(reg),
        method='linear model',
        targets=[],
        is_regression=True,
    )

    def _weights(label_id):
        coef = get_coef(reg, label_id)
        scores = _multiply(x, coef)
        return get_top_features(feature_names, scores, top)

    names = get_default_target_names(reg)
    display_names = get_display_names(names, target_names, targets)

    if is_multitarget_regressor(reg):
        for label_id, label in display_names:
            target_expl = TargetExplanation(
                target=label,
                feature_weights=_weights(label_id),
                score=score[label_id],
            )
            _add_weighted_spans(doc, vec, target_expl)
            res.targets.append(target_expl)
    else:
        target_expl = TargetExplanation(
            target=display_names[0][1],
            feature_weights=_weights(0),
            score=score,
        )
        _add_weighted_spans(doc, vec, target_expl)
        res.targets.append(target_expl)

    return res
コード例 #3
0
ファイル: explain_prediction.py プロジェクト: zanachka/eli5
def explain_prediction_tree_regressor(reg,
                                      doc,
                                      vec=None,
                                      top=None,
                                      top_targets=None,
                                      target_names=None,
                                      targets=None,
                                      feature_names=None,
                                      feature_re=None,
                                      feature_filter=None,
                                      vectorized=False):
    """ Explain prediction of a tree regressor.

    See :func:`eli5.explain_prediction` for description of
    ``top``, ``top_targets``, ``target_names``, ``targets``,
    ``feature_names``, ``feature_re`` and ``feature_filter`` parameters.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the regressor ``reg``
    (e.g. a fitted CountVectorizer instance); you can pass it
    instead of ``feature_names``.

    ``vectorized`` is a flag which tells eli5 if ``doc`` should be
    passed through ``vec`` or not. By default it is False, meaning that
    if ``vec`` is not None, ``vec.transform([doc])`` is passed to the
    regressor. Set it to True if you're passing ``vec``,
    but ``doc`` is already vectorized.

    Method for determining feature importances follows an idea from
    http://blog.datadive.net/interpreting-random-forests/.
    Feature weights are calculated by following decision paths in trees
    of an ensemble (or a single tree for DecisionTreeRegressor).
    Each node of the tree has an output score, and contribution of a feature
    on the decision path is how much the score changes from parent to child.
    Weights of all features sum to the output score of the estimator.
    """
    vec, feature_names = handle_vec(reg, doc, vec, vectorized, feature_names)
    X = get_X(doc, vec=vec, vectorized=vectorized)
    if feature_names.bias_name is None:
        # Tree estimators do not have an intercept, but here we interpret
        # them as having an intercept
        feature_names.bias_name = '<BIAS>'

    score, = reg.predict(X)
    num_targets = getattr(reg, 'n_outputs_', 1)
    is_multitarget = num_targets > 1
    feature_weights = _trees_feature_weights(reg, X, feature_names,
                                             num_targets)
    x = get_X0(add_intercept(X))
    flt_feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re, x)

    def _weights(label_id, scale=1.0):
        weights = feature_weights[:, label_id]
        return get_top_features_filtered(x, flt_feature_names, flt_indices,
                                         weights, top, scale)

    res = Explanation(
        estimator=repr(reg),
        method='decision path',
        description=(DESCRIPTION_TREE_REG_MULTITARGET
                     if is_multitarget else DESCRIPTION_TREE_REG),
        targets=[],
        is_regression=True,
    )
    assert res.targets is not None

    names = get_default_target_names(reg, num_targets=num_targets)
    display_names = get_target_display_names(names, target_names, targets,
                                             top_targets, score)

    if is_multitarget:
        for label_id, label in display_names:
            target_expl = TargetExplanation(
                target=label,
                feature_weights=_weights(label_id),
                score=score[label_id],
            )
            add_weighted_spans(doc, vec, vectorized, target_expl)
            res.targets.append(target_expl)
    else:
        target_expl = TargetExplanation(
            target=display_names[0][1],
            feature_weights=_weights(0),
            score=score,
        )
        add_weighted_spans(doc, vec, vectorized, target_expl)
        res.targets.append(target_expl)

    return res
コード例 #4
0
ファイル: explain_prediction.py プロジェクト: zanachka/eli5
def explain_prediction_linear_regressor(reg,
                                        doc,
                                        vec=None,
                                        top=None,
                                        top_targets=None,
                                        target_names=None,
                                        targets=None,
                                        feature_names=None,
                                        feature_re=None,
                                        feature_filter=None,
                                        vectorized=False):
    """
    Explain prediction of a linear regressor.

    See :func:`eli5.explain_prediction` for description of
    ``top``, ``top_targets``, ``target_names``, ``targets``,
    ``feature_names``, ``feature_re`` and ``feature_filter`` parameters.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the classifier ``clf``;
    you can pass it instead of ``feature_names``.

    ``vectorized`` is a flag which tells eli5 if ``doc`` should be
    passed through ``vec`` or not. By default it is False, meaning that
    if ``vec`` is not None, ``vec.transform([doc])`` is passed to the
    regressor ``reg``. Set it to True if you're passing ``vec``,
    but ``doc`` is already vectorized.
    """
    if isinstance(reg, (SVR, NuSVR)) and reg.kernel != 'linear':
        return explain_prediction_sklearn_not_supported(reg, doc)

    vec, feature_names = handle_vec(reg, doc, vec, vectorized, feature_names)
    X = get_X(doc, vec=vec, vectorized=vectorized, to_dense=True)

    score, = reg.predict(X)

    if has_intercept(reg):
        X = add_intercept(X)
    x = get_X0(X)

    feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re, x)

    res = Explanation(
        estimator=repr(reg),
        method='linear model',
        targets=[],
        is_regression=True,
    )
    assert res.targets is not None

    _weights = _linear_weights(reg, x, top, feature_names, flt_indices)
    names = get_default_target_names(reg)
    display_names = get_target_display_names(names, target_names, targets,
                                             top_targets, score)

    if is_multitarget_regressor(reg):
        for label_id, label in display_names:
            target_expl = TargetExplanation(
                target=label,
                feature_weights=_weights(label_id),
                score=score[label_id],
            )
            add_weighted_spans(doc, vec, vectorized, target_expl)
            res.targets.append(target_expl)
    else:
        target_expl = TargetExplanation(
            target=display_names[0][1],
            feature_weights=_weights(0),
            score=score,
        )
        add_weighted_spans(doc, vec, vectorized, target_expl)
        res.targets.append(target_expl)

    return res
コード例 #5
0
def explain_linear_regressor_weights(
    reg,
    vec=None,
    top=_TOP,
    target_names=None,
    targets=None,
    feature_names=None,
    coef_scale=None,
    feature_re=None,
    feature_filter=None,
):
    """
    Return an explanation of a linear regressor weights.

    See :func:`eli5.explain_weights` for description of
    ``top``, ``target_names``, ``targets``, ``feature_names``,
    ``feature_re`` and ``feature_filter`` parameters.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the regressor ``reg``; you can
    pass it instead of ``feature_names``.

    ``coef_scale`` is a 1D np.ndarray with a scaling coefficient
    for each feature; coef[i] = coef[i] * coef_scale[i] if
    coef_scale[i] is not nan. Use it if you want to scale coefficients
    before displaying them, to take input feature sign or scale in account.
    """
    feature_names, coef_scale = handle_hashing_vec(vec, feature_names,
                                                   coef_scale)
    feature_names = get_feature_names(reg, vec, feature_names=feature_names)
    feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re)
    _extra_caveats = "\n" + HASHING_CAVEATS if is_invhashing(vec) else ''

    def _features(target_id):
        coef = get_coef(reg, target_id, scale=coef_scale)
        if flt_indices is not None:
            coef = coef[flt_indices]
        return get_top_features(feature_names, coef, top)

    display_names = get_target_display_names(get_default_target_names(reg),
                                             target_names, targets)
    if is_multitarget_regressor(reg):
        return Explanation(
            targets=[
                TargetExplanation(target=target_name,
                                  feature_weights=_features(target_id))
                for target_id, target_name in display_names
            ],
            description=DESCRIPTION_REGRESSION_MULTITARGET + _extra_caveats,
            estimator=repr(reg),
            method='linear model',
            is_regression=True,
        )
    else:
        return Explanation(
            targets=[
                TargetExplanation(
                    target=display_names[0][1],
                    feature_weights=_features(0),
                )
            ],
            description=DESCRIPTION_REGRESSION + _extra_caveats,
            estimator=repr(reg),
            method='linear model',
            is_regression=True,
        )
コード例 #6
0
ファイル: explain_weights.py プロジェクト: woshahua/eli5
def explain_linear_regressor_weights(reg,
                                     vec=None,
                                     top=_TOP,
                                     target_names=None,
                                     targets=None,
                                     feature_names=None,
                                     coef_scale=None,
                                     feature_re=None):
    """
    Return an explanation of a linear regressor weights in the following
    format::

        Explanation(
            estimator="<regressor repr>",
            method="<interpretation method>",
            description="<human readable description>",
            targets=[
                TargetExplanation(
                    target="<target name>",
                    feature_weights=FeatureWeights(
                        # positive weights
                        pos=[
                            (feature_name, coefficient),
                            ...
                        ],

                        # negative weights
                        neg=[
                            (feature_name, coefficient),
                            ...
                        ],

                        # A number of features not shown
                        pos_remaining = <int>,
                        neg_remaining = <int>,

                        # Sum of feature weights not shown
                        # pos_remaining_sum = <float>,
                        # neg_remaining_sum = <float>,
                    ),
                ),
                ...
            ]
        )

    To print it use utilities from eli5.formatters.
    """
    feature_names, coef_scale = handle_hashing_vec(vec, feature_names,
                                                   coef_scale)
    feature_names = get_feature_names(reg, vec, feature_names=feature_names)
    if feature_re is not None:
        feature_names, flt_indices = feature_names.filtered_by_re(feature_re)
    _extra_caveats = "\n" + HASHING_CAVEATS if is_invhashing(vec) else ''

    def _features(target_id):
        coef = get_coef(reg, target_id, scale=coef_scale)
        if feature_re is not None:
            coef = coef[flt_indices]
        return get_top_features(feature_names, coef, top)

    display_names = get_display_names(get_default_target_names(reg),
                                      target_names, targets)
    if is_multitarget_regressor(reg):
        return Explanation(
            targets=[
                TargetExplanation(target=target_name,
                                  feature_weights=_features(target_id))
                for target_id, target_name in display_names
            ],
            description=DESCRIPTION_REGRESSION_MULTITARGET + _extra_caveats,
            estimator=repr(reg),
            method='linear model',
            is_regression=True,
        )
    else:
        return Explanation(
            targets=[
                TargetExplanation(
                    target=display_names[0][1],
                    feature_weights=_features(0),
                )
            ],
            description=DESCRIPTION_REGRESSION + _extra_caveats,
            estimator=repr(reg),
            method='linear model',
            is_regression=True,
        )