Ejemplo n.º 1
0
def test_explain_prediction_libsvm_linear(clf, doc, *args, **kwargs):
    if clf.kernel != 'linear':
        return Explanation(
            estimator=repr(clf),
            error="only kernel='linear' is currently supported for "
            "libsvm-based classifiers",
        )
    if len(getattr(clf, 'classes_', [])) > 2:
        return Explanation(
            estimator=repr(clf),
            error="only binary libsvm-based classifiers are supported",
        )
    return explain_prediction_linear_classifier(clf, doc, *args, **kwargs)
Ejemplo n.º 2
0
def test_transition_features():
    expl = Explanation(
        estimator='some estimator',
        targets=[
            TargetExplanation('class1',
                              feature_weights=FeatureWeights(
                                  pos=[FeatureWeight('pos', 13, value=1)],
                                  neg=[],
                              )),
            TargetExplanation('class2',
                              feature_weights=FeatureWeights(
                                  pos=[FeatureWeight('pos', 13, value=1)],
                                  neg=[],
                              )),
        ],
        transition_features=TransitionFeatureWeights(
            class_names=['class2', 'class1'],  # reverse on purpose
            coef=np.array([[1.5, 2.5], [3.5, 4.5]]),
        ))
    df_dict = format_as_dataframes(expl)
    assert isinstance(df_dict, dict)
    assert set(df_dict) == {'targets', 'transition_features'}
    assert df_dict['targets'].equals(format_as_dataframe(expl.targets))
    df = df_dict['transition_features']
    print(df)
    print(format_as_text(expl))
    assert str(df) == ('to      class2  class1\n'
                       'from                  \n'
                       'class2     1.5     2.5\n'
                       'class1     3.5     4.5')

    with pytest.warns(UserWarning):
        single_df = format_as_dataframe(expl)
    assert single_df.equals(df)
Ejemplo n.º 3
0
def test_feature_importances(with_std, with_value):
    expl = Explanation(estimator='some estimator',
                       feature_importances=FeatureImportances(
                           importances=[
                               FeatureWeight('a',
                                             1,
                                             std=0.1 if with_std else None,
                                             value=1 if with_value else None),
                               FeatureWeight('b',
                                             2,
                                             std=0.2 if with_std else None,
                                             value=3 if with_value else None),
                           ],
                           remaining=10,
                       ))
    df_dict = format_as_dataframes(expl)
    assert isinstance(df_dict, dict)
    assert list(df_dict) == ['feature_importances']
    df = df_dict['feature_importances']
    expected_df = pd.DataFrame({'weight': [1, 2]}, index=['a', 'b'])
    if with_std:
        expected_df['std'] = [0.1, 0.2]
    if with_value:
        expected_df['value'] = [1, 3]
    print(df, expected_df, sep='\n')
    assert expected_df.equals(df)

    single_df = format_as_dataframe(expl)
    assert expected_df.equals(single_df)
Ejemplo n.º 4
0
def test_targets_with_value():
    expl = Explanation(
        estimator='some estimator',
        targets=[
            TargetExplanation('y',
                              feature_weights=FeatureWeights(
                                  pos=[
                                      FeatureWeight('a', 13, value=1),
                                      FeatureWeight('b', 5, value=2)
                                  ],
                                  neg=[
                                      FeatureWeight('neg1', -10, value=3),
                                      FeatureWeight('neg2', -1, value=4)
                                  ],
                              )),
            TargetExplanation('y2',
                              feature_weights=FeatureWeights(
                                  pos=[FeatureWeight('f', 1, value=5)],
                                  neg=[],
                              )),
        ],
    )
    df = format_as_dataframe(expl)
    expected_df = pd.DataFrame(
        {
            'weight': [13, 5, -1, -10, 1],
            'value': [1, 2, 4, 3, 5]
        },
        columns=['weight', 'value'],
        index=pd.MultiIndex.from_tuples([('y', 'a'), ('y', 'b'), ('y', 'neg2'),
                                         ('y', 'neg1'), ('y2', 'f')],
                                        names=['target', 'feature']))
    print(df, expected_df, sep='\n')
    assert expected_df.equals(df)
Ejemplo n.º 5
0
def get_feature_importance_explanation(estimator,
                                       vec,
                                       coef,
                                       feature_names,
                                       feature_filter,
                                       feature_re,
                                       top,
                                       description,
                                       is_regression,
                                       estimator_feature_names=None,
                                       num_features=None,
                                       coef_std=None):
    # type: (...) -> Explanation
    feature_names, flt_indices = get_feature_names_filtered(
        estimator,
        vec,
        feature_names=feature_names,
        estimator_feature_names=estimator_feature_names,
        feature_filter=feature_filter,
        feature_re=feature_re,
        num_features=num_features,
    )
    feature_importances = get_feature_importances_filtered(
        coef, feature_names, flt_indices, top, coef_std)
    return Explanation(
        feature_importances=feature_importances,
        description=description,
        estimator=repr(estimator),
        method='feature importances',
        is_regression=is_regression,
    )
Ejemplo n.º 6
0
def get_decision_path_explanation(estimator, doc, vec, vectorized,
                                  x, feature_names,
                                  feature_filter, feature_re, top,
                                  original_display_names,
                                  target_names, targets, top_targets,
                                  is_regression, is_multiclass, proba,
                                  get_score_weights):
    # type: (...) -> Explanation

    display_names = get_target_display_names(
        original_display_names, target_names, targets, top_targets, proba)
    flt_feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re, x)

    def get_top_features(weights, scale=1.0):
        return get_top_features_filtered(x, flt_feature_names, flt_indices,
                                         weights, top, scale)

    explanation = Explanation(
        estimator=repr(estimator),
        method='decision paths',
        description={
            (False, False): DESCRIPTION_CLF_BINARY,
            (False, True): DESCRIPTION_CLF_MULTICLASS,
            (True, False): DESCRIPTION_REGRESSION,
        }[is_regression, is_multiclass],
        is_regression=is_regression,
        targets=[],
    )
    assert explanation.targets is not None

    if is_multiclass:
        for label_id, label in display_names:
            score, all_feature_weights = get_score_weights(label_id)
            target_expl = TargetExplanation(
                target=label,
                feature_weights=get_top_features(all_feature_weights),
                score=score,
                proba=proba[label_id] if proba is not None else None,
            )
            add_weighted_spans(doc, vec, vectorized, target_expl)
            explanation.targets.append(target_expl)
    else:
        score, all_feature_weights = get_score_weights(0)
        if is_regression:
            target, scale, label_id = display_names[-1][1], 1.0, 1
        else:
            target, scale, label_id = get_binary_target_scale_label_id(
                score, display_names, proba)

        target_expl = TargetExplanation(
            target=target,
            feature_weights=get_top_features(all_feature_weights, scale),
            score=score,
            proba=proba[label_id] if proba is not None else None,
        )
        add_weighted_spans(doc, vec, vectorized, target_expl)
        explanation.targets.append(target_expl)

    return explanation
Ejemplo n.º 7
0
def explain_prediction_linear_classifier(clf,
                                         doc,
                                         vec=None,
                                         top=None,
                                         target_names=None,
                                         targets=None,
                                         feature_names=None,
                                         vectorized=False):
    """ Explain prediction of a linear classifier. """
    vec, feature_names = _handle_vec(clf, doc, vec, vectorized, feature_names)
    X = _get_X(doc, vec=vec, vectorized=vectorized)

    if is_probabilistic_classifier(clf):
        try:
            proba, = clf.predict_proba(X)
        except NotImplementedError:
            proba = None
    else:
        proba = None
    score, = clf.decision_function(X)

    if has_intercept(clf):
        X = _add_intercept(X)
    x, = X

    res = Explanation(
        estimator=repr(clf),
        method='linear model',
        targets=[],
    )

    def _weights(label_id):
        coef = get_coef(clf, label_id)
        scores = _multiply(x, coef)
        return get_top_features(feature_names, scores, top)

    display_names = get_display_names(clf.classes_, target_names, targets)

    if is_multiclass_classifier(clf):
        for label_id, label in display_names:
            target_expl = TargetExplanation(
                target=label,
                feature_weights=_weights(label_id),
                score=score[label_id],
                proba=proba[label_id] if proba is not None else None,
            )
            _add_weighted_spans(doc, vec, target_expl)
            res.targets.append(target_expl)
    else:
        target_expl = TargetExplanation(
            target=display_names[1][1],
            feature_weights=_weights(0),
            score=score,
            proba=proba[1] if proba is not None else None,
        )
        _add_weighted_spans(doc, vec, target_expl)
        res.targets.append(target_expl)

    return res
Ejemplo n.º 8
0
def explain_weights_lightning_not_supported(
        estimator, vec=None, top=20, target_names=None,
        targets=None, feature_names=None,
        coef_scale=None):
    return Explanation(
        estimator=repr(estimator),
        error="Error: estimator %r is not supported" % estimator,
    )
Ejemplo n.º 9
0
def explain_prediction_lightning_not_supported(
        estimator, doc, vec=None, top=None,
        target_names=None, targets=None,
        feature_names=None, vectorized=False,
        coef_scale=None):
    return Explanation(
        estimator=repr(estimator),
        error="Error: estimator %r is not supported" % estimator,
    )
def explain_weights_sklearn_not_supported(
        estimator, vec=None, top=_TOP,
        target_names=None,
        targets=None,
        feature_names=None, coef_scale=None,
        feature_re=None, feature_filter=None):
    return Explanation(
        estimator=repr(estimator),
        error="estimator %r is not supported" % estimator,
    )
Ejemplo n.º 11
0
def explain_prediction_keras_not_supported(model, doc):
    """
    Can not do an explanation based on the passed arguments.
    Did you pass either "image" or "tokens"?
    """
    return Explanation(
        model.name,
        error='model "{}" is not supported, '
              'try passing the "image" argument if explaining an image model.'.format(model.name),
    )
Ejemplo n.º 12
0
def test_targets(with_std, with_value):
    expl = Explanation(
        estimator='some estimator',
        targets=[
            TargetExplanation(
                'y',
                feature_weights=FeatureWeights(
                    pos=[
                        FeatureWeight('a',
                                      13,
                                      std=0.13 if with_std else None,
                                      value=2 if with_value else None),
                        FeatureWeight('b',
                                      5,
                                      std=0.5 if with_std else None,
                                      value=1 if with_value else None)
                    ],
                    neg=[
                        FeatureWeight('neg1',
                                      -10,
                                      std=0.2 if with_std else None,
                                      value=5 if with_value else None),
                        FeatureWeight('neg2',
                                      -1,
                                      std=0.3 if with_std else None,
                                      value=4 if with_value else None)
                    ],
                )),
            TargetExplanation('y2',
                              feature_weights=FeatureWeights(
                                  pos=[FeatureWeight('f', 1)],
                                  neg=[],
                              )),
        ],
    )
    df_dict = format_as_dataframes(expl)
    assert isinstance(df_dict, dict)
    assert list(df_dict) == ['targets']
    df = df_dict['targets']
    expected_df = pd.DataFrame(
        {
            'target': ['y', 'y', 'y', 'y', 'y2'],
            'feature': ['a', 'b', 'neg2', 'neg1', 'f'],
            'weight': [13, 5, -1, -10, 1]
        },
        columns=['target', 'feature', 'weight'])
    if with_std:
        expected_df['std'] = [0.13, 0.5, 0.3, 0.2, None]
    if with_value:
        expected_df['value'] = [2, 1, 4, 5, None]
    print(df, expected_df, sep='\n')
    assert expected_df.equals(df)

    single_df = format_as_dataframe(expl)
    assert expected_df.equals(single_df)
Ejemplo n.º 13
0
def explain_decision_tree(
        estimator,
        vec=None,
        top=_TOP,
        target_names=None,
        targets=None,  # ignored
        feature_names=None,
        feature_re=None,
        feature_filter=None,
        **export_graphviz_kwargs):
    """
    Return an explanation of a decision tree.

    See :func:`eli5.explain_weights` for description of
    ``top``, ``target_names``, ``feature_names``,
    ``feature_re`` and ``feature_filter`` parameters.

    ``targets`` parameter is ignored.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the estimator (e.g. a fitted
    CountVectorizer instance); you can pass it instead of ``feature_names``.

    All other keyword arguments are passed to
    `sklearn.tree.export_graphviz`_ function.

    .. _sklearn.tree.export_graphviz: http://scikit-learn.org/stable/modules/generated/sklearn.tree.export_graphviz.html
    """
    feature_names = get_feature_names(estimator,
                                      vec,
                                      feature_names=feature_names)
    coef = estimator.feature_importances_
    tree_feature_names = feature_names
    feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re)
    if flt_indices is not None:
        coef = coef[flt_indices]
    indices = argsort_k_largest_positive(coef, top)
    names, values = feature_names[indices], coef[indices]
    export_graphviz_kwargs.setdefault("proportion", True)
    tree_info = get_tree_info(estimator,
                              feature_names=tree_feature_names,
                              class_names=target_names,
                              **export_graphviz_kwargs)

    return Explanation(
        feature_importances=FeatureImportances(
            [FeatureWeight(*x) for x in zip(names, values)],
            remaining=np.count_nonzero(coef) - len(indices),
        ),
        decision_tree=tree_info,
        description=DESCRIPTION_DECISION_TREE,
        estimator=repr(estimator),
        method='decision tree',
    )
Ejemplo n.º 14
0
def test_format_as_dict():
    assert format_as_dict(
        Explanation(
            estimator='some estimator',
            targets=[
                TargetExplanation('y',
                                  feature_weights=FeatureWeights(pos=[
                                      FeatureWeight('a', np.float32(13.0))
                                  ],
                                                                 neg=[])),
            ],
        )) == {
            'estimator':
            'some estimator',
            'targets': [
                {
                    'target': 'y',
                    'feature_weights': {
                        'pos': [{
                            'feature': 'a',
                            'weight': 13.0,
                            'std': None,
                            'value': None
                        }],
                        'pos_remaining':
                        0,
                        'neg': [],
                        'neg_remaining':
                        0,
                    },
                    'score': None,
                    'proba': None,
                    'weighted_spans': None,
                    'heatmap': None,
                },
            ],
            'decision_tree':
            None,
            'description':
            None,
            'error':
            None,
            'feature_importances':
            None,
            'highlight_spaces':
            None,
            'is_regression':
            False,
            'method':
            None,
            'transition_features':
            None,
            'image':
            None,
        }
Ejemplo n.º 15
0
def test_transition_features():
    expl = Explanation(
        estimator='some estimator',
        targets=[
            TargetExplanation('class1',
                              feature_weights=FeatureWeights(
                                  pos=[FeatureWeight('pos', 13, value=1)],
                                  neg=[],
                              )),
            TargetExplanation('class2',
                              feature_weights=FeatureWeights(
                                  pos=[FeatureWeight('pos', 13, value=1)],
                                  neg=[],
                              )),
        ],
        transition_features=TransitionFeatureWeights(
            class_names=['class2', 'class1'],  # reverse on purpose
            coef=np.array([[1.5, 2.5], [3.5, 4.5]]),
        ))
    df_dict = format_as_dataframes(expl)
    assert isinstance(df_dict, dict)
    assert set(df_dict) == {'targets', 'transition_features'}
    assert df_dict['targets'].equals(format_as_dataframe(expl.targets))
    df = df_dict['transition_features']
    print(df)
    print(format_as_text(expl))
    expected = pd.DataFrame([
        {
            'from': 'class2',
            'to': 'class2',
            'coef': 1.5
        },
        {
            'from': 'class2',
            'to': 'class1',
            'coef': 2.5
        },
        {
            'from': 'class1',
            'to': 'class2',
            'coef': 3.5
        },
        {
            'from': 'class1',
            'to': 'class1',
            'coef': 4.5
        },
    ],
                            columns=['from', 'to', 'coef'])
    assert df.equals(expected)
    with pytest.warns(UserWarning):
        single_df = format_as_dataframe(expl)
    assert single_df.equals(df)
Ejemplo n.º 16
0
def explain_weights_xgboost(xgb,
                            vec=None,
                            top=20,
                            target_names=None,  # ignored
                            targets=None,  # ignored
                            feature_names=None,
                            feature_re=None,
                            feature_filter=None,
                            importance_type='gain',
                            ):
    """
    Return an explanation of an XGBoost estimator (via scikit-learn wrapper
    XGBClassifier or XGBRegressor) as feature importances.

    See :func:`eli5.explain_weights` for description of
    ``top``, ``feature_names``,
    ``feature_re`` and ``feature_filter`` parameters.

    ``target_names`` and ``targets`` parameters are ignored.

    Parameters
    ----------
    importance_type : str, optional
        A way to get feature importance. Possible values are:

        - 'gain' - the average gain of the feature when it is used in trees
          (default)
        - 'weight' - the number of times a feature is used to split the data
          across all trees
        - 'cover' - the average coverage of the feature when it is used in trees
    """
    coef = _xgb_feature_importances(xgb, importance_type=importance_type)
    num_features = coef.shape[-1]
    feature_names = get_feature_names(
        xgb, vec, feature_names=feature_names, num_features=num_features)

    feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re)
    if flt_indices is not None:
        coef = coef[flt_indices]

    indices = argsort_k_largest_positive(coef, top)
    names, values = feature_names[indices], coef[indices]
    return Explanation(
        feature_importances=FeatureImportances(
            [FeatureWeight(*x) for x in zip(names, values)],
            remaining=np.count_nonzero(coef) - len(indices),
        ),
        description=DESCRIPTION_XGBOOST,
        estimator=repr(xgb),
        method='feature importances',
        is_regression=isinstance(xgb, XGBRegressor),
    )
Ejemplo n.º 17
0
def explain_weights_lightning(estimator,
                              vec=None,
                              top=20,
                              target_names=None,
                              targets=None,
                              feature_names=None,
                              coef_scale=None):
    """ Return an explanation of a lightning estimator weights """
    return Explanation(
        estimator=repr(estimator),
        description="Error: estimator %r is not supported" % estimator,
    )
Ejemplo n.º 18
0
def explain_prediction_linear_regressor(reg,
                                        doc,
                                        vec=None,
                                        top=None,
                                        target_names=None,
                                        targets=None,
                                        feature_names=None,
                                        vectorized=False):
    """ Explain prediction of a linear regressor. """
    vec, feature_names = _handle_vec(reg, doc, vec, vectorized, feature_names)
    X = _get_X(doc, vec=vec, vectorized=vectorized)

    score, = reg.predict(X)

    if has_intercept(reg):
        X = _add_intercept(X)
    x, = X

    res = Explanation(
        estimator=repr(reg),
        method='linear model',
        targets=[],
        is_regression=True,
    )

    def _weights(label_id):
        coef = get_coef(reg, label_id)
        scores = _multiply(x, coef)
        return get_top_features(feature_names, scores, top)

    names = get_default_target_names(reg)
    display_names = get_display_names(names, target_names, targets)

    if is_multitarget_regressor(reg):
        for label_id, label in display_names:
            target_expl = TargetExplanation(
                target=label,
                feature_weights=_weights(label_id),
                score=score[label_id],
            )
            _add_weighted_spans(doc, vec, target_expl)
            res.targets.append(target_expl)
    else:
        target_expl = TargetExplanation(
            target=display_names[0][1],
            feature_weights=_weights(0),
            score=score,
        )
        _add_weighted_spans(doc, vec, target_expl)
        res.targets.append(target_expl)

    return res
Ejemplo n.º 19
0
def explain_weights_sklearn(estimator,
                            vec=None,
                            top=_TOP,
                            target_names=None,
                            targets=None,
                            feature_names=None,
                            coef_scale=None,
                            feature_re=None):
    """ Return an explanation of an estimator """
    return Explanation(
        estimator=repr(estimator),
        error="estimator %r is not supported" % estimator,
    )
Ejemplo n.º 20
0
def explain_prediction_sklearn(estimator,
                               doc,
                               vec=None,
                               top=None,
                               target_names=None,
                               targets=None,
                               feature_names=None,
                               vectorized=False):
    """ Return an explanation of a scikit-learn estimator """
    return Explanation(
        estimator=repr(estimator),
        error="estimator %r is not supported" % estimator,
    )
Ejemplo n.º 21
0
def explain_decision_tree(
        estimator,
        vec=None,
        top=_TOP,
        target_names=None,
        targets=None,  # ignored
        feature_names=None,
        feature_re=None,
        feature_filter=None,
        **export_graphviz_kwargs):
    """
    Return an explanation of a decision tree.

    See :func:`eli5.explain_weights` for description of
    ``top``, ``target_names``, ``feature_names``,
    ``feature_re`` and ``feature_filter`` parameters.

    ``targets`` parameter is ignored.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the estimator (e.g. a fitted
    CountVectorizer instance); you can pass it instead of ``feature_names``.

    All other keyword arguments are passed to
    `sklearn.tree.export_graphviz`_ function.

    .. _sklearn.tree.export_graphviz: http://scikit-learn.org/stable/modules/generated/sklearn.tree.export_graphviz.html
    """
    feature_names = get_feature_names(estimator,
                                      vec,
                                      feature_names=feature_names)
    tree_feature_names = feature_names
    feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re)
    feature_importances = get_feature_importances_filtered(
        estimator.feature_importances_, feature_names, flt_indices, top)

    export_graphviz_kwargs.setdefault("proportion", True)
    tree_info = get_tree_info(estimator,
                              feature_names=tree_feature_names,
                              class_names=target_names,
                              **export_graphviz_kwargs)

    return Explanation(
        feature_importances=feature_importances,
        decision_tree=tree_info,
        description=DESCRIPTION_DECISION_TREE,
        estimator=repr(estimator),
        method='decision tree',
    )
Ejemplo n.º 22
0
def explain_weights_sklearn_crfsuite(crf,
                                     top=20,
                                     target_names=None,
                                     targets=None,
                                     feature_re=None,
                                     feature_filter=None):
    """ Explain sklearn_crfsuite.CRF weights.

    See :func:`eli5.explain_weights` for description of
    ``top``, ``target_names``, ``targets``,
    ``feature_re`` and ``feature_filter`` parameters.
    """
    feature_names = np.array(crf.attributes_)
    state_coef = crf_state_coef(crf).todense().A
    transition_coef = crf_transition_coef(crf)

    if feature_filter is not None or feature_re is not None:
        state_feature_names, flt_indices = (
            FeatureNames(feature_names).handle_filter(feature_filter, feature_re))
        state_feature_names = np.array(state_feature_names.feature_names)
        state_coef = state_coef[:, flt_indices]
    else:
        state_feature_names = feature_names

    def _features(label_id):
        return get_top_features(state_feature_names, state_coef[label_id], top)

    if targets is None:
        targets = sorted_for_ner(crf.classes_)

    display_names = get_target_display_names(crf.classes_, target_names,
                                             targets)
    indices, names = zip(*display_names)
    transition_coef = filter_transition_coefs(transition_coef, indices)

    return Explanation(
        targets=[
            TargetExplanation(
                target=label,
                feature_weights=_features(label_id)
            )
            for label_id, label in zip(indices, names)
        ],
        transition_features=TransitionFeatureWeights(
            class_names=names,
            coef=transition_coef,
        ),
        estimator=repr(crf),
        method='CRF',
    )
Ejemplo n.º 23
0
def explain_prediction_lightning(estimator,
                                 doc,
                                 vec=None,
                                 top=None,
                                 target_names=None,
                                 targets=None,
                                 feature_names=None,
                                 vectorized=False,
                                 coef_scale=None):
    """ Return an explanation of a lightning estimator predictions """
    return Explanation(
        estimator=repr(estimator),
        description="Error: estimator %r is not supported" % estimator,
    )
Ejemplo n.º 24
0
def explain_decision_tree(
        clf,
        vec=None,
        top=_TOP,
        target_names=None,
        targets=None,  # ignored
        feature_names=None,
        feature_re=None,
        **export_graphviz_kwargs):
    """
    Return an explanation of a decision tree classifier in the
    following format (compatible with random forest explanations)::

        Explanation(
            estimator="<classifier repr>",
            method="<interpretation method>",
            description="<human readable description>",
            decision_tree={...tree information},
            feature_importances=[
                FeatureWeight(feature_name, importance, std_deviation),
                ...
            ]
        )

    """
    feature_names = get_feature_names(clf, vec, feature_names=feature_names)
    coef = clf.feature_importances_
    tree_feature_names = feature_names
    if feature_re is not None:
        feature_names, flt_indices = feature_names.filtered_by_re(feature_re)
        coef = coef[flt_indices]
    indices = argsort_k_largest(coef, top)
    names, values = feature_names[indices], coef[indices]
    std = np.zeros_like(values)
    export_graphviz_kwargs.setdefault("proportion", True)
    tree_info = get_tree_info(clf,
                              feature_names=tree_feature_names,
                              class_names=target_names,
                              **export_graphviz_kwargs)

    return Explanation(
        feature_importances=[
            FeatureWeight(*x) for x in zip(names, values, std)
        ],
        decision_tree=tree_info,
        description=DESCRIPTION_DECISION_TREE,
        estimator=repr(clf),
        method='decision tree',
    )
Ejemplo n.º 25
0
def explain_prediction_sklearn_not_supported(estimator,
                                             doc,
                                             vec=None,
                                             top=None,
                                             top_targets=None,
                                             target_names=None,
                                             targets=None,
                                             feature_names=None,
                                             feature_re=None,
                                             feature_filter=None,
                                             vectorized=False):
    return Explanation(
        estimator=repr(estimator),
        error="estimator %r is not supported" % estimator,
    )
Ejemplo n.º 26
0
def explain_rf_feature_importance(
    estimator,
    vec=None,
    top=_TOP,
    target_names=None,  # ignored
    targets=None,  # ignored
    feature_names=None,
    feature_re=None,
    feature_filter=None,
):
    """
    Return an explanation of a tree-based ensemble estimator.

    See :func:`eli5.explain_weights` for description of
    ``top``, ``feature_names``, ``feature_re`` and ``feature_filter``
    parameters.

    ``target_names`` and ``targets`` parameters are ignored.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the estimator (e.g. a fitted
    CountVectorizer instance); you can pass it instead of ``feature_names``.
    """
    feature_names = get_feature_names(estimator,
                                      vec,
                                      feature_names=feature_names)
    coef = estimator.feature_importances_
    trees = np.array(estimator.estimators_).ravel()
    coef_std = np.std([tree.feature_importances_ for tree in trees], axis=0)

    feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re)
    if flt_indices is not None:
        coef = coef[flt_indices]
        coef_std = coef_std[flt_indices]

    indices = argsort_k_largest_positive(coef, top)
    names, values, std = feature_names[indices], coef[indices], coef_std[
        indices]
    return Explanation(
        feature_importances=FeatureImportances(
            [FeatureWeight(*x) for x in zip(names, values, std)],
            remaining=np.count_nonzero(coef) - len(indices),
        ),
        description=DESCRIPTION_RANDOM_FOREST,
        estimator=repr(estimator),
        method='feature importances',
    )
Ejemplo n.º 27
0
def get_decision_path_explanation(estimator, doc, vec, vectorized,
                                  original_display_names,
                                  target_names, targets, top_targets,
                                  is_regression, is_multiclass, proba,
                                  get_score_feature_weights):

    display_names = get_target_display_names(
        original_display_names, target_names, targets, top_targets, proba)

    explanation = Explanation(
        estimator=repr(estimator),
        method='decision paths',
        description={
            (False, False): DESCRIPTION_CLF_BINARY,
            (False, True): DESCRIPTION_CLF_MULTICLASS,
            (True, False): DESCRIPTION_REGRESSION,
        }[is_regression, is_multiclass],
        is_regression=is_regression,
        targets=[],
    )

    if is_multiclass:
        for label_id, label in display_names:
            score, feature_weights = get_score_feature_weights(label_id)
            target_expl = TargetExplanation(
                target=label,
                feature_weights=feature_weights,
                score=score,
                proba=proba[label_id] if proba is not None else None,
            )
            add_weighted_spans(doc, vec, vectorized, target_expl)
            explanation.targets.append(target_expl)
    else:
        score, feature_weights = get_score_feature_weights(0)
        target_expl = TargetExplanation(
            target=display_names[-1][1],
            feature_weights=feature_weights,
            score=score,
            proba=proba[1] if proba is not None else None,
        )
        add_weighted_spans(doc, vec, vectorized, target_expl)
        explanation.targets.append(target_expl)

    return explanation
Ejemplo n.º 28
0
def explain_rf_feature_importance(
        clf,
        vec=None,
        top=_TOP,
        target_names=None,  # ignored
        targets=None,  # ignored
        feature_names=None,
        feature_re=None):
    """
    Return an explanation of a tree-based ensemble classifier in the
    following format::

        Explanation(
            estimator="<classifier repr>",
            method="<interpretation method>",
            description="<human readable description>",
            feature_importances=[
                FeatureWeight(feature_name, importance, std_deviation),
                ...
            ]
        )
    """
    feature_names = get_feature_names(clf, vec, feature_names=feature_names)
    coef = clf.feature_importances_
    trees = np.array(clf.estimators_).ravel()
    coef_std = np.std([tree.feature_importances_ for tree in trees], axis=0)

    if feature_re is not None:
        feature_names, flt_indices = feature_names.filtered_by_re(feature_re)
        coef = coef[flt_indices]
        coef_std = coef_std[flt_indices]

    indices = argsort_k_largest(coef, top)
    names, values, std = feature_names[indices], coef[indices], coef_std[
        indices]
    return Explanation(
        feature_importances=[
            FeatureWeight(*x) for x in zip(names, values, std)
        ],
        description=DESCRIPTION_RANDOM_FOREST,
        estimator=repr(clf),
        method='feature importances',
    )
Ejemplo n.º 29
0
 def explain_predictions(self, docs, top=30):
     if not isinstance(self.clf, XGBClassifier):
         raise NotImplementedError
     booster = self.clf.booster()
     xgb_feature_names = {f: i for i, f in enumerate(booster.feature_names)}
     feature_names = get_feature_names(self.clf,
                                       self.vec,
                                       num_features=len(xgb_feature_names))
     feature_names.bias_name = '<BIAS>'
     X = self.vec.transform(docs)
     X = X.tocsc()
     dmatrix = DMatrix(X, missing=self.clf.missing)
     leaf_ids = booster.predict(dmatrix, pred_leaf=True)
     tree_dumps = booster.get_dump(with_stats=True)
     docs_weights = []
     for i, _leaf_ids in enumerate(leaf_ids):
         all_weights = _target_feature_weights(
             _leaf_ids,
             tree_dumps,
             feature_names=feature_names,
             xgb_feature_names=xgb_feature_names)[1]
         weights = np.zeros_like(all_weights)
         idx = X[i].nonzero()[1]
         bias_idx = feature_names.bias_idx
         weights[idx] = all_weights[idx]
         weights[bias_idx] = all_weights[bias_idx]
         docs_weights.append(weights)
     weights = np.mean(docs_weights, axis=0)
     feature_weights = get_top_features(feature_names=np.array(
         [_prettify_feature(f) for f in feature_names]),
                                        coef=weights,
                                        top=top)
     return Explanation(
         estimator=type(self.clf).__name__,
         targets=[TargetExplanation('y', feature_weights=feature_weights)],
     )
Ejemplo n.º 30
0
def explain_prediction_tree_regressor(reg,
                                      doc,
                                      vec=None,
                                      top=None,
                                      top_targets=None,
                                      target_names=None,
                                      targets=None,
                                      feature_names=None,
                                      feature_re=None,
                                      feature_filter=None,
                                      vectorized=False):
    """ Explain prediction of a tree regressor.

    See :func:`eli5.explain_prediction` for description of
    ``top``, ``top_targets``, ``target_names``, ``targets``,
    ``feature_names``, ``feature_re`` and ``feature_filter`` parameters.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the regressor ``reg``
    (e.g. a fitted CountVectorizer instance); you can pass it
    instead of ``feature_names``.

    ``vectorized`` is a flag which tells eli5 if ``doc`` should be
    passed through ``vec`` or not. By default it is False, meaning that
    if ``vec`` is not None, ``vec.transform([doc])`` is passed to the
    regressor. Set it to True if you're passing ``vec``,
    but ``doc`` is already vectorized.

    Method for determining feature importances follows an idea from
    http://blog.datadive.net/interpreting-random-forests/.
    Feature weights are calculated by following decision paths in trees
    of an ensemble (or a single tree for DecisionTreeRegressor).
    Each node of the tree has an output score, and contribution of a feature
    on the decision path is how much the score changes from parent to child.
    Weights of all features sum to the output score of the estimator.
    """
    vec, feature_names = handle_vec(reg, doc, vec, vectorized, feature_names)
    X = get_X(doc, vec=vec, vectorized=vectorized)
    if feature_names.bias_name is None:
        # Tree estimators do not have an intercept, but here we interpret
        # them as having an intercept
        feature_names.bias_name = '<BIAS>'

    score, = reg.predict(X)
    num_targets = getattr(reg, 'n_outputs_', 1)
    is_multitarget = num_targets > 1
    feature_weights = _trees_feature_weights(reg, X, feature_names,
                                             num_targets)
    x = get_X0(add_intercept(X))
    flt_feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re, x)

    def _weights(label_id, scale=1.0):
        weights = feature_weights[:, label_id]
        return get_top_features_filtered(x, flt_feature_names, flt_indices,
                                         weights, top, scale)

    res = Explanation(
        estimator=repr(reg),
        method='decision path',
        description=(DESCRIPTION_TREE_REG_MULTITARGET
                     if is_multitarget else DESCRIPTION_TREE_REG),
        targets=[],
        is_regression=True,
    )
    assert res.targets is not None

    names = get_default_target_names(reg, num_targets=num_targets)
    display_names = get_target_display_names(names, target_names, targets,
                                             top_targets, score)

    if is_multitarget:
        for label_id, label in display_names:
            target_expl = TargetExplanation(
                target=label,
                feature_weights=_weights(label_id),
                score=score[label_id],
            )
            add_weighted_spans(doc, vec, vectorized, target_expl)
            res.targets.append(target_expl)
    else:
        target_expl = TargetExplanation(
            target=display_names[0][1],
            feature_weights=_weights(0),
            score=score,
        )
        add_weighted_spans(doc, vec, vectorized, target_expl)
        res.targets.append(target_expl)

    return res