Esempio n. 1
0
def explain_decision_tree(
        clf,
        vec=None,
        top=_TOP,
        target_names=None,
        targets=None,  # ignored
        feature_names=None,
        feature_re=None,
        **export_graphviz_kwargs):
    """
    Return an explanation of a decision tree classifier in the
    following format (compatible with random forest explanations)::

        Explanation(
            estimator="<classifier repr>",
            method="<interpretation method>",
            description="<human readable description>",
            decision_tree={...tree information},
            feature_importances=[
                FeatureWeight(feature_name, importance, std_deviation),
                ...
            ]
        )

    """
    feature_names = get_feature_names(clf, vec, feature_names=feature_names)
    coef = clf.feature_importances_
    tree_feature_names = feature_names
    if feature_re is not None:
        feature_names, flt_indices = feature_names.filtered_by_re(feature_re)
        coef = coef[flt_indices]
    indices = argsort_k_largest(coef, top)
    names, values = feature_names[indices], coef[indices]
    std = np.zeros_like(values)
    export_graphviz_kwargs.setdefault("proportion", True)
    tree_info = get_tree_info(clf,
                              feature_names=tree_feature_names,
                              class_names=target_names,
                              **export_graphviz_kwargs)

    return Explanation(
        feature_importances=[
            FeatureWeight(*x) for x in zip(names, values, std)
        ],
        decision_tree=tree_info,
        description=DESCRIPTION_DECISION_TREE,
        estimator=repr(clf),
        method='decision tree',
    )
Esempio n. 2
0
def explain_rf_feature_importance(
        clf,
        vec=None,
        top=_TOP,
        target_names=None,  # ignored
        targets=None,  # ignored
        feature_names=None,
        feature_re=None):
    """
    Return an explanation of a tree-based ensemble classifier in the
    following format::

        Explanation(
            estimator="<classifier repr>",
            method="<interpretation method>",
            description="<human readable description>",
            feature_importances=[
                FeatureWeight(feature_name, importance, std_deviation),
                ...
            ]
        )
    """
    feature_names = get_feature_names(clf, vec, feature_names=feature_names)
    coef = clf.feature_importances_
    trees = np.array(clf.estimators_).ravel()
    coef_std = np.std([tree.feature_importances_ for tree in trees], axis=0)

    if feature_re is not None:
        feature_names, flt_indices = feature_names.filtered_by_re(feature_re)
        coef = coef[flt_indices]
        coef_std = coef_std[flt_indices]

    indices = argsort_k_largest(coef, top)
    names, values, std = feature_names[indices], coef[indices], coef_std[
        indices]
    return Explanation(
        feature_importances=[
            FeatureWeight(*x) for x in zip(names, values, std)
        ],
        description=DESCRIPTION_RANDOM_FOREST,
        estimator=repr(clf),
        method='feature importances',
    )
Esempio n. 3
0
def get_feature_importances_filtered(coef,
                                     feature_names,
                                     flt_indices,
                                     top,
                                     coef_std=None):
    # type: (...) -> FeatureImportances
    if flt_indices is not None:
        coef = coef[flt_indices]
        if coef_std is not None:
            coef_std = coef_std[flt_indices]

    indices = argsort_k_largest(coef, top)
    names, values = feature_names[indices], coef[indices]
    std = None if coef_std is None else coef_std[indices]
    return FeatureImportances.from_names_values(
        names,
        values,
        std,
        remaining=coef.shape[0] - len(indices),
    )
Esempio n. 4
0
def test_argsort_k_largest_empty():
    x = np.array([0])
    empty = np.array([])
    assert _np_eq(x[argsort_k_largest(x, 0)], empty)
    assert _np_eq(x[argsort_k_largest_positive(x, None)], empty)
Esempio n. 5
0
def test_argsort_k_largest_None(x):
    assert len(argsort_k_largest(x, None)) == len(x)
Esempio n. 6
0
def test_argsort_k_largest_zero(x):
    assert len(argsort_k_largest(x, 0)) == 0
Esempio n. 7
0
def test_argsort_k_largest(x, k):
    assume(len(x) >= k)
    assume(len(set(x)) == len(x))
    assume(not np.isnan(x).any())
    assert (np.argsort(x)[-k:][::-1] == argsort_k_largest(x, k)).all()