def _handle_vec(clf, doc, vec, vectorized, feature_names): if isinstance(vec, HashingVectorizer) and not vectorized: vec = InvertableHashingVectorizer(vec) vec.fit([doc]) if is_invhashing(vec) and feature_names is None: # Explaining predictions does not need coef_scale, # because it is handled by the vectorizer. feature_names = vec.get_feature_names(always_signed=False) feature_names = get_feature_names(clf, vec, feature_names=feature_names) return vec, feature_names
def explain_linear_regressor_weights( reg, vec=None, top=_TOP, target_names=None, targets=None, feature_names=None, coef_scale=None, feature_re=None, feature_filter=None, ): """ Return an explanation of a linear regressor weights. See :func:`eli5.explain_weights` for description of ``top``, ``target_names``, ``targets``, ``feature_names``, ``feature_re`` and ``feature_filter`` parameters. ``vec`` is a vectorizer instance used to transform raw features to the input of the regressor ``reg``; you can pass it instead of ``feature_names``. ``coef_scale`` is a 1D np.ndarray with a scaling coefficient for each feature; coef[i] = coef[i] * coef_scale[i] if coef_scale[i] is not nan. Use it if you want to scale coefficients before displaying them, to take input feature sign or scale in account. """ feature_names, coef_scale = handle_hashing_vec(vec, feature_names, coef_scale) feature_names = get_feature_names(reg, vec, feature_names=feature_names) feature_names, flt_indices = feature_names.handle_filter( feature_filter, feature_re) _extra_caveats = "\n" + HASHING_CAVEATS if is_invhashing(vec) else '' def _features(target_id): coef = get_coef(reg, target_id, scale=coef_scale) if flt_indices is not None: coef = coef[flt_indices] return get_top_features(feature_names, coef, top) display_names = get_target_display_names(get_default_target_names(reg), target_names, targets) if is_multitarget_regressor(reg): return Explanation( targets=[ TargetExplanation(target=target_name, feature_weights=_features(target_id)) for target_id, target_name in display_names ], description=DESCRIPTION_REGRESSION_MULTITARGET + _extra_caveats, estimator=repr(reg), method='linear model', is_regression=True, ) else: return Explanation( targets=[ TargetExplanation( target=display_names[0][1], feature_weights=_features(0), ) ], description=DESCRIPTION_REGRESSION + _extra_caveats, estimator=repr(reg), method='linear model', is_regression=True, )
def explain_linear_classifier_weights( clf, vec=None, top=_TOP, target_names=None, targets=None, feature_names=None, coef_scale=None, feature_re=None, feature_filter=None, ): """ Return an explanation of a linear classifier weights. See :func:`eli5.explain_weights` for description of ``top``, ``target_names``, ``targets``, ``feature_names``, ``feature_re`` and ``feature_filter`` parameters. ``vec`` is a vectorizer instance used to transform raw features to the input of the classifier ``clf`` (e.g. a fitted CountVectorizer instance); you can pass it instead of ``feature_names``. ``coef_scale`` is a 1D np.ndarray with a scaling coefficient for each feature; coef[i] = coef[i] * coef_scale[i] if coef_scale[i] is not nan. Use it if you want to scale coefficients before displaying them, to take input feature sign or scale in account. """ feature_names, coef_scale = handle_hashing_vec(vec, feature_names, coef_scale) feature_names = get_feature_names(clf, vec, feature_names=feature_names) feature_names, flt_indices = feature_names.handle_filter( feature_filter, feature_re) _extra_caveats = "\n" + HASHING_CAVEATS if is_invhashing(vec) else '' def _features(label_id): coef = get_coef(clf, label_id, scale=coef_scale) if flt_indices is not None: coef = coef[flt_indices] return get_top_features(feature_names, coef, top) display_names = get_target_display_names(clf.classes_, target_names, targets) if is_multiclass_classifier(clf): return Explanation( targets=[ TargetExplanation(target=label, feature_weights=_features(label_id)) for label_id, label in display_names ], description=DESCRIPTION_CLF_MULTICLASS + _extra_caveats, estimator=repr(clf), method='linear model', ) else: # for binary classifiers scikit-learn stores a single coefficient # vector, which corresponds to clf.classes_[1]. return Explanation( targets=[ TargetExplanation( target=display_names[1][1], feature_weights=_features(0), ) ], description=DESCRIPTION_CLF_BINARY + _extra_caveats, estimator=repr(clf), method='linear model', )
def explain_linear_regressor_weights(reg, vec=None, top=_TOP, target_names=None, targets=None, feature_names=None, coef_scale=None, feature_re=None): """ Return an explanation of a linear regressor weights in the following format:: Explanation( estimator="<regressor repr>", method="<interpretation method>", description="<human readable description>", targets=[ TargetExplanation( target="<target name>", feature_weights=FeatureWeights( # positive weights pos=[ (feature_name, coefficient), ... ], # negative weights neg=[ (feature_name, coefficient), ... ], # A number of features not shown pos_remaining = <int>, neg_remaining = <int>, # Sum of feature weights not shown # pos_remaining_sum = <float>, # neg_remaining_sum = <float>, ), ), ... ] ) To print it use utilities from eli5.formatters. """ feature_names, coef_scale = handle_hashing_vec(vec, feature_names, coef_scale) feature_names = get_feature_names(reg, vec, feature_names=feature_names) if feature_re is not None: feature_names, flt_indices = feature_names.filtered_by_re(feature_re) _extra_caveats = "\n" + HASHING_CAVEATS if is_invhashing(vec) else '' def _features(target_id): coef = get_coef(reg, target_id, scale=coef_scale) if feature_re is not None: coef = coef[flt_indices] return get_top_features(feature_names, coef, top) display_names = get_display_names(get_default_target_names(reg), target_names, targets) if is_multitarget_regressor(reg): return Explanation( targets=[ TargetExplanation(target=target_name, feature_weights=_features(target_id)) for target_id, target_name in display_names ], description=DESCRIPTION_REGRESSION_MULTITARGET + _extra_caveats, estimator=repr(reg), method='linear model', is_regression=True, ) else: return Explanation( targets=[ TargetExplanation( target=display_names[0][1], feature_weights=_features(0), ) ], description=DESCRIPTION_REGRESSION + _extra_caveats, estimator=repr(reg), method='linear model', is_regression=True, )
def explain_linear_classifier_weights(clf, vec=None, top=_TOP, target_names=None, targets=None, feature_names=None, coef_scale=None, feature_re=None): """ Return an explanation of a linear classifier weights in the following format:: Explanation( estimator="<classifier repr>", method="<interpretation method>", description="<human readable description>", targets=[ TargetExplanation( target="<class name>", feature_weights=FeatureWeights( # positive weights pos=[ (feature_name, coefficient), ... ], # negative weights neg=[ (feature_name, coefficient), ... ], # A number of features not shown pos_remaining = <int>, neg_remaining = <int>, # Sum of feature weights not shown # pos_remaining_sum = <float>, # neg_remaining_sum = <float>, ), ), ... ] ) To print it use utilities from eli5.formatters. """ feature_names, coef_scale = handle_hashing_vec(vec, feature_names, coef_scale) feature_names = get_feature_names(clf, vec, feature_names=feature_names) if feature_re is not None: feature_names, flt_indices = feature_names.filtered_by_re(feature_re) _extra_caveats = "\n" + HASHING_CAVEATS if is_invhashing(vec) else '' def _features(label_id): coef = get_coef(clf, label_id, scale=coef_scale) if feature_re is not None: coef = coef[flt_indices] return get_top_features(feature_names, coef, top) display_names = get_display_names(clf.classes_, target_names, targets) if is_multiclass_classifier(clf): return Explanation( targets=[ TargetExplanation(target=label, feature_weights=_features(label_id)) for label_id, label in display_names ], description=DESCRIPTION_CLF_MULTICLASS + _extra_caveats, estimator=repr(clf), method='linear model', ) else: # for binary classifiers scikit-learn stores a single coefficient # vector, which corresponds to clf.classes_[1]. return Explanation( targets=[ TargetExplanation( target=display_names[1][1], feature_weights=_features(0), ) ], description=DESCRIPTION_CLF_BINARY + _extra_caveats, estimator=repr(clf), method='linear model', )