コード例 #1
0
def test_tree2dict():
    X = [[1, 1], [0, 2], [0, 3], [1, 3], [2, 3], [0, 4]]
    y = [0, 0, 0, 1, 1, 1]
    clf = DecisionTreeClassifier(random_state=42).fit(X, y)
    text = tree2text(get_tree_info(clf))
    print(text)
    expected = """
x1 <= 2.500  (33.3%)  ---> [1.000, 0.000]
x1 > 2.500  (66.7%)
    x0 <= 0.500  (33.3%)
        x1 <= 3.500  (16.7%)  ---> [1.000, 0.000]
        x1 > 3.500  (16.7%)  ---> [0.000, 1.000]
    x0 > 0.500  (33.3%)  ---> [0.000, 1.000]
""".strip()
    assert text == expected

    # check it with feature_names
    text = tree2text(get_tree_info(clf, feature_names=['x', 'y']))
    print(text)
    expected = """
y <= 2.500  (33.3%)  ---> [1.000, 0.000]
y > 2.500  (66.7%)
    x <= 0.500  (33.3%)
        y <= 3.500  (16.7%)  ---> [1.000, 0.000]
        y > 3.500  (16.7%)  ---> [0.000, 1.000]
    x > 0.500  (33.3%)  ---> [0.000, 1.000]
""".strip()
    assert text == expected
コード例 #2
0
def explain_decision_tree(
        estimator,
        vec=None,
        top=_TOP,
        target_names=None,
        targets=None,  # ignored
        feature_names=None,
        feature_re=None,
        feature_filter=None,
        **export_graphviz_kwargs):
    """
    Return an explanation of a decision tree.

    See :func:`eli5.explain_weights` for description of
    ``top``, ``target_names``, ``feature_names``,
    ``feature_re`` and ``feature_filter`` parameters.

    ``targets`` parameter is ignored.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the estimator (e.g. a fitted
    CountVectorizer instance); you can pass it instead of ``feature_names``.

    All other keyword arguments are passed to
    `sklearn.tree.export_graphviz`_ function.

    .. _sklearn.tree.export_graphviz: http://scikit-learn.org/stable/modules/generated/sklearn.tree.export_graphviz.html
    """
    feature_names = get_feature_names(estimator,
                                      vec,
                                      feature_names=feature_names)
    coef = estimator.feature_importances_
    tree_feature_names = feature_names
    feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re)
    if flt_indices is not None:
        coef = coef[flt_indices]
    indices = argsort_k_largest_positive(coef, top)
    names, values = feature_names[indices], coef[indices]
    export_graphviz_kwargs.setdefault("proportion", True)
    tree_info = get_tree_info(estimator,
                              feature_names=tree_feature_names,
                              class_names=target_names,
                              **export_graphviz_kwargs)

    return Explanation(
        feature_importances=FeatureImportances(
            [FeatureWeight(*x) for x in zip(names, values)],
            remaining=np.count_nonzero(coef) - len(indices),
        ),
        decision_tree=tree_info,
        description=DESCRIPTION_DECISION_TREE,
        estimator=repr(estimator),
        method='decision tree',
    )
コード例 #3
0
ファイル: explain_weights.py プロジェクト: suryalistic/eli5
def explain_decision_tree(
        estimator,
        vec=None,
        top=_TOP,
        target_names=None,
        targets=None,  # ignored
        feature_names=None,
        feature_re=None,
        feature_filter=None,
        **export_graphviz_kwargs):
    """
    Return an explanation of a decision tree.

    See :func:`eli5.explain_weights` for description of
    ``top``, ``target_names``, ``feature_names``,
    ``feature_re`` and ``feature_filter`` parameters.

    ``targets`` parameter is ignored.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the estimator (e.g. a fitted
    CountVectorizer instance); you can pass it instead of ``feature_names``.

    All other keyword arguments are passed to
    `sklearn.tree.export_graphviz`_ function.

    .. _sklearn.tree.export_graphviz: http://scikit-learn.org/stable/modules/generated/sklearn.tree.export_graphviz.html
    """
    feature_names = get_feature_names(estimator,
                                      vec,
                                      feature_names=feature_names)
    tree_feature_names = feature_names
    feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re)
    feature_importances = get_feature_importances_filtered(
        estimator.feature_importances_, feature_names, flt_indices, top)

    export_graphviz_kwargs.setdefault("proportion", True)
    tree_info = get_tree_info(estimator,
                              feature_names=tree_feature_names,
                              class_names=target_names,
                              **export_graphviz_kwargs)

    return Explanation(
        feature_importances=feature_importances,
        decision_tree=tree_info,
        description=DESCRIPTION_DECISION_TREE,
        estimator=repr(estimator),
        method='decision tree',
    )
コード例 #4
0
ファイル: explain_weights.py プロジェクト: woshahua/eli5
def explain_decision_tree(
        clf,
        vec=None,
        top=_TOP,
        target_names=None,
        targets=None,  # ignored
        feature_names=None,
        feature_re=None,
        **export_graphviz_kwargs):
    """
    Return an explanation of a decision tree classifier in the
    following format (compatible with random forest explanations)::

        Explanation(
            estimator="<classifier repr>",
            method="<interpretation method>",
            description="<human readable description>",
            decision_tree={...tree information},
            feature_importances=[
                FeatureWeight(feature_name, importance, std_deviation),
                ...
            ]
        )

    """
    feature_names = get_feature_names(clf, vec, feature_names=feature_names)
    coef = clf.feature_importances_
    tree_feature_names = feature_names
    if feature_re is not None:
        feature_names, flt_indices = feature_names.filtered_by_re(feature_re)
        coef = coef[flt_indices]
    indices = argsort_k_largest(coef, top)
    names, values = feature_names[indices], coef[indices]
    std = np.zeros_like(values)
    export_graphviz_kwargs.setdefault("proportion", True)
    tree_info = get_tree_info(clf,
                              feature_names=tree_feature_names,
                              class_names=target_names,
                              **export_graphviz_kwargs)

    return Explanation(
        feature_importances=[
            FeatureWeight(*x) for x in zip(names, values, std)
        ],
        decision_tree=tree_info,
        description=DESCRIPTION_DECISION_TREE,
        estimator=repr(clf),
        method='decision tree',
    )