Ejemplo n.º 1
0
def handle_vec(clf, doc, vec, vectorized, feature_names, num_features=None):
    if not vectorized:
        vec = invert_hashing_and_fit(vec, [doc])
    if (vec is None and feature_names is None and
            pandas_available and isinstance(doc, pd.Series)):
        feature_names = list(doc.index)
    # Explaining predictions does not need coef_scale
    # because it is handled by the vectorizer.
    feature_names = handle_hashing_vec(
        vec, feature_names, coef_scale=None, with_coef_scale=False)
    feature_names = get_feature_names(
        clf, vec, feature_names=feature_names, num_features=num_features)
    return vec, feature_names
Ejemplo n.º 2
0
def explain_linear_regressor_weights(
    reg,
    vec=None,
    top=_TOP,
    target_names=None,
    targets=None,
    feature_names=None,
    coef_scale=None,
    feature_re=None,
    feature_filter=None,
):
    """
    Return an explanation of a linear regressor weights.

    See :func:`eli5.explain_weights` for description of
    ``top``, ``target_names``, ``targets``, ``feature_names``,
    ``feature_re`` and ``feature_filter`` parameters.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the regressor ``reg``; you can
    pass it instead of ``feature_names``.

    ``coef_scale`` is a 1D np.ndarray with a scaling coefficient
    for each feature; coef[i] = coef[i] * coef_scale[i] if
    coef_scale[i] is not nan. Use it if you want to scale coefficients
    before displaying them, to take input feature sign or scale in account.
    """
    feature_names, coef_scale = handle_hashing_vec(vec, feature_names,
                                                   coef_scale)
    feature_names = get_feature_names(reg, vec, feature_names=feature_names)
    feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re)
    _extra_caveats = "\n" + HASHING_CAVEATS if is_invhashing(vec) else ''

    def _features(target_id):
        coef = get_coef(reg, target_id, scale=coef_scale)
        if flt_indices is not None:
            coef = coef[flt_indices]
        return get_top_features(feature_names, coef, top)

    display_names = get_target_display_names(get_default_target_names(reg),
                                             target_names, targets)
    if is_multitarget_regressor(reg):
        return Explanation(
            targets=[
                TargetExplanation(target=target_name,
                                  feature_weights=_features(target_id))
                for target_id, target_name in display_names
            ],
            description=DESCRIPTION_REGRESSION_MULTITARGET + _extra_caveats,
            estimator=repr(reg),
            method='linear model',
            is_regression=True,
        )
    else:
        return Explanation(
            targets=[
                TargetExplanation(
                    target=display_names[0][1],
                    feature_weights=_features(0),
                )
            ],
            description=DESCRIPTION_REGRESSION + _extra_caveats,
            estimator=repr(reg),
            method='linear model',
            is_regression=True,
        )
Ejemplo n.º 3
0
def explain_linear_regressor_weights(reg,
                                     vec=None,
                                     top=_TOP,
                                     target_names=None,
                                     targets=None,
                                     feature_names=None,
                                     coef_scale=None,
                                     feature_re=None):
    """
    Return an explanation of a linear regressor weights in the following
    format::

        Explanation(
            estimator="<regressor repr>",
            method="<interpretation method>",
            description="<human readable description>",
            targets=[
                TargetExplanation(
                    target="<target name>",
                    feature_weights=FeatureWeights(
                        # positive weights
                        pos=[
                            (feature_name, coefficient),
                            ...
                        ],

                        # negative weights
                        neg=[
                            (feature_name, coefficient),
                            ...
                        ],

                        # A number of features not shown
                        pos_remaining = <int>,
                        neg_remaining = <int>,

                        # Sum of feature weights not shown
                        # pos_remaining_sum = <float>,
                        # neg_remaining_sum = <float>,
                    ),
                ),
                ...
            ]
        )

    To print it use utilities from eli5.formatters.
    """
    feature_names, coef_scale = handle_hashing_vec(vec, feature_names,
                                                   coef_scale)
    feature_names = get_feature_names(reg, vec, feature_names=feature_names)
    if feature_re is not None:
        feature_names, flt_indices = feature_names.filtered_by_re(feature_re)
    _extra_caveats = "\n" + HASHING_CAVEATS if is_invhashing(vec) else ''

    def _features(target_id):
        coef = get_coef(reg, target_id, scale=coef_scale)
        if feature_re is not None:
            coef = coef[flt_indices]
        return get_top_features(feature_names, coef, top)

    display_names = get_display_names(get_default_target_names(reg),
                                      target_names, targets)
    if is_multitarget_regressor(reg):
        return Explanation(
            targets=[
                TargetExplanation(target=target_name,
                                  feature_weights=_features(target_id))
                for target_id, target_name in display_names
            ],
            description=DESCRIPTION_REGRESSION_MULTITARGET + _extra_caveats,
            estimator=repr(reg),
            method='linear model',
            is_regression=True,
        )
    else:
        return Explanation(
            targets=[
                TargetExplanation(
                    target=display_names[0][1],
                    feature_weights=_features(0),
                )
            ],
            description=DESCRIPTION_REGRESSION + _extra_caveats,
            estimator=repr(reg),
            method='linear model',
            is_regression=True,
        )
Ejemplo n.º 4
0
def explain_linear_classifier_weights(
    clf,
    vec=None,
    top=_TOP,
    target_names=None,
    targets=None,
    feature_names=None,
    coef_scale=None,
    feature_re=None,
    feature_filter=None,
):
    """
    Return an explanation of a linear classifier weights.

    See :func:`eli5.explain_weights` for description of
    ``top``, ``target_names``, ``targets``, ``feature_names``,
    ``feature_re`` and ``feature_filter`` parameters.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the classifier ``clf``
    (e.g. a fitted CountVectorizer instance); you can pass it
    instead of ``feature_names``.

    ``coef_scale`` is a 1D np.ndarray with a scaling coefficient
    for each feature; coef[i] = coef[i] * coef_scale[i] if
    coef_scale[i] is not nan. Use it if you want to scale coefficients
    before displaying them, to take input feature sign or scale in account.
    """
    feature_names, coef_scale = handle_hashing_vec(vec, feature_names,
                                                   coef_scale)
    feature_names = get_feature_names(clf, vec, feature_names=feature_names)
    feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re)

    _extra_caveats = "\n" + HASHING_CAVEATS if is_invhashing(vec) else ''

    def _features(label_id):
        coef = get_coef(clf, label_id, scale=coef_scale)
        if flt_indices is not None:
            coef = coef[flt_indices]
        return get_top_features(feature_names, coef, top)

    display_names = get_target_display_names(clf.classes_, target_names,
                                             targets)
    if is_multiclass_classifier(clf):
        return Explanation(
            targets=[
                TargetExplanation(target=label,
                                  feature_weights=_features(label_id))
                for label_id, label in display_names
            ],
            description=DESCRIPTION_CLF_MULTICLASS + _extra_caveats,
            estimator=repr(clf),
            method='linear model',
        )
    else:
        # for binary classifiers scikit-learn stores a single coefficient
        # vector, which corresponds to clf.classes_[1].
        return Explanation(
            targets=[
                TargetExplanation(
                    target=display_names[1][1],
                    feature_weights=_features(0),
                )
            ],
            description=DESCRIPTION_CLF_BINARY + _extra_caveats,
            estimator=repr(clf),
            method='linear model',
        )
Ejemplo n.º 5
0
def explain_linear_classifier_weights(clf,
                                      vec=None,
                                      top=_TOP,
                                      target_names=None,
                                      targets=None,
                                      feature_names=None,
                                      coef_scale=None,
                                      feature_re=None):
    """
    Return an explanation of a linear classifier weights in the following
    format::

        Explanation(
            estimator="<classifier repr>",
            method="<interpretation method>",
            description="<human readable description>",
            targets=[
                TargetExplanation(
                    target="<class name>",
                    feature_weights=FeatureWeights(
                        # positive weights
                        pos=[
                            (feature_name, coefficient),
                            ...
                        ],

                        # negative weights
                        neg=[
                            (feature_name, coefficient),
                            ...
                        ],

                        # A number of features not shown
                        pos_remaining = <int>,
                        neg_remaining = <int>,

                        # Sum of feature weights not shown
                        # pos_remaining_sum = <float>,
                        # neg_remaining_sum = <float>,
                    ),
                ),
                ...
            ]
        )

    To print it use utilities from eli5.formatters.
    """
    feature_names, coef_scale = handle_hashing_vec(vec, feature_names,
                                                   coef_scale)
    feature_names = get_feature_names(clf, vec, feature_names=feature_names)
    if feature_re is not None:
        feature_names, flt_indices = feature_names.filtered_by_re(feature_re)

    _extra_caveats = "\n" + HASHING_CAVEATS if is_invhashing(vec) else ''

    def _features(label_id):
        coef = get_coef(clf, label_id, scale=coef_scale)
        if feature_re is not None:
            coef = coef[flt_indices]
        return get_top_features(feature_names, coef, top)

    display_names = get_display_names(clf.classes_, target_names, targets)
    if is_multiclass_classifier(clf):
        return Explanation(
            targets=[
                TargetExplanation(target=label,
                                  feature_weights=_features(label_id))
                for label_id, label in display_names
            ],
            description=DESCRIPTION_CLF_MULTICLASS + _extra_caveats,
            estimator=repr(clf),
            method='linear model',
        )
    else:
        # for binary classifiers scikit-learn stores a single coefficient
        # vector, which corresponds to clf.classes_[1].
        return Explanation(
            targets=[
                TargetExplanation(
                    target=display_names[1][1],
                    feature_weights=_features(0),
                )
            ],
            description=DESCRIPTION_CLF_BINARY + _extra_caveats,
            estimator=repr(clf),
            method='linear model',
        )