def test_get_default_target_names(): clf = SGDRegressor(**SGD_KWARGS) X, y = make_regression(n_targets=1, n_features=3) clf.fit(X, y) assert set(get_default_target_names(clf)) == {'y'} clf = ElasticNet() X, y = make_regression(n_targets=2, n_features=3) clf.fit(X, y) assert set(get_default_target_names(clf)) == {'y0', 'y1'}
def explain_prediction_linear_regressor(reg, doc, vec=None, top=None, target_names=None, targets=None, feature_names=None, vectorized=False): """ Explain prediction of a linear regressor. """ vec, feature_names = _handle_vec(reg, doc, vec, vectorized, feature_names) X = _get_X(doc, vec=vec, vectorized=vectorized) score, = reg.predict(X) if has_intercept(reg): X = _add_intercept(X) x, = X res = Explanation( estimator=repr(reg), method='linear model', targets=[], is_regression=True, ) def _weights(label_id): coef = get_coef(reg, label_id) scores = _multiply(x, coef) return get_top_features(feature_names, scores, top) names = get_default_target_names(reg) display_names = get_display_names(names, target_names, targets) if is_multitarget_regressor(reg): for label_id, label in display_names: target_expl = TargetExplanation( target=label, feature_weights=_weights(label_id), score=score[label_id], ) _add_weighted_spans(doc, vec, target_expl) res.targets.append(target_expl) else: target_expl = TargetExplanation( target=display_names[0][1], feature_weights=_weights(0), score=score, ) _add_weighted_spans(doc, vec, target_expl) res.targets.append(target_expl) return res
def explain_prediction_tree_regressor(reg, doc, vec=None, top=None, top_targets=None, target_names=None, targets=None, feature_names=None, feature_re=None, feature_filter=None, vectorized=False): """ Explain prediction of a tree regressor. See :func:`eli5.explain_prediction` for description of ``top``, ``top_targets``, ``target_names``, ``targets``, ``feature_names``, ``feature_re`` and ``feature_filter`` parameters. ``vec`` is a vectorizer instance used to transform raw features to the input of the regressor ``reg`` (e.g. a fitted CountVectorizer instance); you can pass it instead of ``feature_names``. ``vectorized`` is a flag which tells eli5 if ``doc`` should be passed through ``vec`` or not. By default it is False, meaning that if ``vec`` is not None, ``vec.transform([doc])`` is passed to the regressor. Set it to True if you're passing ``vec``, but ``doc`` is already vectorized. Method for determining feature importances follows an idea from http://blog.datadive.net/interpreting-random-forests/. Feature weights are calculated by following decision paths in trees of an ensemble (or a single tree for DecisionTreeRegressor). Each node of the tree has an output score, and contribution of a feature on the decision path is how much the score changes from parent to child. Weights of all features sum to the output score of the estimator. """ vec, feature_names = handle_vec(reg, doc, vec, vectorized, feature_names) X = get_X(doc, vec=vec, vectorized=vectorized) if feature_names.bias_name is None: # Tree estimators do not have an intercept, but here we interpret # them as having an intercept feature_names.bias_name = '<BIAS>' score, = reg.predict(X) num_targets = getattr(reg, 'n_outputs_', 1) is_multitarget = num_targets > 1 feature_weights = _trees_feature_weights(reg, X, feature_names, num_targets) x = get_X0(add_intercept(X)) flt_feature_names, flt_indices = feature_names.handle_filter( feature_filter, feature_re, x) def _weights(label_id, scale=1.0): weights = feature_weights[:, label_id] return get_top_features_filtered(x, flt_feature_names, flt_indices, weights, top, scale) res = Explanation( estimator=repr(reg), method='decision path', description=(DESCRIPTION_TREE_REG_MULTITARGET if is_multitarget else DESCRIPTION_TREE_REG), targets=[], is_regression=True, ) assert res.targets is not None names = get_default_target_names(reg, num_targets=num_targets) display_names = get_target_display_names(names, target_names, targets, top_targets, score) if is_multitarget: for label_id, label in display_names: target_expl = TargetExplanation( target=label, feature_weights=_weights(label_id), score=score[label_id], ) add_weighted_spans(doc, vec, vectorized, target_expl) res.targets.append(target_expl) else: target_expl = TargetExplanation( target=display_names[0][1], feature_weights=_weights(0), score=score, ) add_weighted_spans(doc, vec, vectorized, target_expl) res.targets.append(target_expl) return res
def explain_prediction_linear_regressor(reg, doc, vec=None, top=None, top_targets=None, target_names=None, targets=None, feature_names=None, feature_re=None, feature_filter=None, vectorized=False): """ Explain prediction of a linear regressor. See :func:`eli5.explain_prediction` for description of ``top``, ``top_targets``, ``target_names``, ``targets``, ``feature_names``, ``feature_re`` and ``feature_filter`` parameters. ``vec`` is a vectorizer instance used to transform raw features to the input of the classifier ``clf``; you can pass it instead of ``feature_names``. ``vectorized`` is a flag which tells eli5 if ``doc`` should be passed through ``vec`` or not. By default it is False, meaning that if ``vec`` is not None, ``vec.transform([doc])`` is passed to the regressor ``reg``. Set it to True if you're passing ``vec``, but ``doc`` is already vectorized. """ if isinstance(reg, (SVR, NuSVR)) and reg.kernel != 'linear': return explain_prediction_sklearn_not_supported(reg, doc) vec, feature_names = handle_vec(reg, doc, vec, vectorized, feature_names) X = get_X(doc, vec=vec, vectorized=vectorized, to_dense=True) score, = reg.predict(X) if has_intercept(reg): X = add_intercept(X) x = get_X0(X) feature_names, flt_indices = feature_names.handle_filter( feature_filter, feature_re, x) res = Explanation( estimator=repr(reg), method='linear model', targets=[], is_regression=True, ) assert res.targets is not None _weights = _linear_weights(reg, x, top, feature_names, flt_indices) names = get_default_target_names(reg) display_names = get_target_display_names(names, target_names, targets, top_targets, score) if is_multitarget_regressor(reg): for label_id, label in display_names: target_expl = TargetExplanation( target=label, feature_weights=_weights(label_id), score=score[label_id], ) add_weighted_spans(doc, vec, vectorized, target_expl) res.targets.append(target_expl) else: target_expl = TargetExplanation( target=display_names[0][1], feature_weights=_weights(0), score=score, ) add_weighted_spans(doc, vec, vectorized, target_expl) res.targets.append(target_expl) return res
def explain_linear_regressor_weights( reg, vec=None, top=_TOP, target_names=None, targets=None, feature_names=None, coef_scale=None, feature_re=None, feature_filter=None, ): """ Return an explanation of a linear regressor weights. See :func:`eli5.explain_weights` for description of ``top``, ``target_names``, ``targets``, ``feature_names``, ``feature_re`` and ``feature_filter`` parameters. ``vec`` is a vectorizer instance used to transform raw features to the input of the regressor ``reg``; you can pass it instead of ``feature_names``. ``coef_scale`` is a 1D np.ndarray with a scaling coefficient for each feature; coef[i] = coef[i] * coef_scale[i] if coef_scale[i] is not nan. Use it if you want to scale coefficients before displaying them, to take input feature sign or scale in account. """ feature_names, coef_scale = handle_hashing_vec(vec, feature_names, coef_scale) feature_names = get_feature_names(reg, vec, feature_names=feature_names) feature_names, flt_indices = feature_names.handle_filter( feature_filter, feature_re) _extra_caveats = "\n" + HASHING_CAVEATS if is_invhashing(vec) else '' def _features(target_id): coef = get_coef(reg, target_id, scale=coef_scale) if flt_indices is not None: coef = coef[flt_indices] return get_top_features(feature_names, coef, top) display_names = get_target_display_names(get_default_target_names(reg), target_names, targets) if is_multitarget_regressor(reg): return Explanation( targets=[ TargetExplanation(target=target_name, feature_weights=_features(target_id)) for target_id, target_name in display_names ], description=DESCRIPTION_REGRESSION_MULTITARGET + _extra_caveats, estimator=repr(reg), method='linear model', is_regression=True, ) else: return Explanation( targets=[ TargetExplanation( target=display_names[0][1], feature_weights=_features(0), ) ], description=DESCRIPTION_REGRESSION + _extra_caveats, estimator=repr(reg), method='linear model', is_regression=True, )
def explain_linear_regressor_weights(reg, vec=None, top=_TOP, target_names=None, targets=None, feature_names=None, coef_scale=None, feature_re=None): """ Return an explanation of a linear regressor weights in the following format:: Explanation( estimator="<regressor repr>", method="<interpretation method>", description="<human readable description>", targets=[ TargetExplanation( target="<target name>", feature_weights=FeatureWeights( # positive weights pos=[ (feature_name, coefficient), ... ], # negative weights neg=[ (feature_name, coefficient), ... ], # A number of features not shown pos_remaining = <int>, neg_remaining = <int>, # Sum of feature weights not shown # pos_remaining_sum = <float>, # neg_remaining_sum = <float>, ), ), ... ] ) To print it use utilities from eli5.formatters. """ feature_names, coef_scale = handle_hashing_vec(vec, feature_names, coef_scale) feature_names = get_feature_names(reg, vec, feature_names=feature_names) if feature_re is not None: feature_names, flt_indices = feature_names.filtered_by_re(feature_re) _extra_caveats = "\n" + HASHING_CAVEATS if is_invhashing(vec) else '' def _features(target_id): coef = get_coef(reg, target_id, scale=coef_scale) if feature_re is not None: coef = coef[flt_indices] return get_top_features(feature_names, coef, top) display_names = get_display_names(get_default_target_names(reg), target_names, targets) if is_multitarget_regressor(reg): return Explanation( targets=[ TargetExplanation(target=target_name, feature_weights=_features(target_id)) for target_id, target_name in display_names ], description=DESCRIPTION_REGRESSION_MULTITARGET + _extra_caveats, estimator=repr(reg), method='linear model', is_regression=True, ) else: return Explanation( targets=[ TargetExplanation( target=display_names[0][1], feature_weights=_features(0), ) ], description=DESCRIPTION_REGRESSION + _extra_caveats, estimator=repr(reg), method='linear model', is_regression=True, )