Exemple #1
0
def test_explain_prediction_clf_multitarget(
        newsgroups_train, filter_missing, use_booster):
    docs, ys, target_names = newsgroups_train
    vec = CountVectorizer(stop_words='english')
    xs = vec.fit_transform(docs)
    if use_booster:
        clf = xgboost.train(
            params={'objective': 'multi:softprob',
                    'num_class': len(target_names),
                    'silent': True,
                    'max_depth': 2},
            dtrain=xgboost.DMatrix(xs, label=ys, missing=np.nan),
            num_boost_round=100,
        )
    else:
        clf = XGBClassifier(n_estimators=100, max_depth=2)
        clf.fit(xs, ys)
    feature_filter = (lambda _, v: not np.isnan(v)) if filter_missing else None
    doc = 'computer graphics in space: a new religion'
    res = explain_prediction(clf, doc, vec=vec, target_names=target_names,
                             feature_filter=feature_filter)
    format_as_all(res, clf)
    if not filter_missing:
        check_targets_scores(res)
    graphics_weights = res.targets[1].feature_weights
    assert 'computer' in get_all_features(graphics_weights.pos)
    religion_weights = res.targets[3].feature_weights
    assert 'religion' in get_all_features(religion_weights.pos)

    top_target_res = explain_prediction(clf, doc, vec=vec, top_targets=2)
    assert len(top_target_res.targets) == 2
    assert sorted(t.proba for t in top_target_res.targets) == sorted(
        t.proba for t in res.targets)[-2:]
Exemple #2
0
def test_explain_prediction_clf_multitarget(newsgroups_train, filter_missing):
    docs, ys, target_names = newsgroups_train
    vec = CountVectorizer(stop_words='english')
    xs = vec.fit_transform(docs)
    clf = XGBClassifier(n_estimators=100, max_depth=2)
    clf.fit(xs, ys)
    feature_filter = (lambda _, v: not np.isnan(v)) if filter_missing else None
    doc = 'computer graphics in space: a new religion'
    res = explain_prediction(clf,
                             doc,
                             vec=vec,
                             target_names=target_names,
                             feature_filter=feature_filter)
    format_as_all(res, clf)
    if not filter_missing:
        check_targets_scores(res)
    graphics_weights = res.targets[1].feature_weights
    assert 'computer' in get_all_features(graphics_weights.pos)
    religion_weights = res.targets[3].feature_weights
    assert 'religion' in get_all_features(religion_weights.pos)

    top_target_res = explain_prediction(clf, doc, vec=vec, top_targets=2)
    assert len(top_target_res.targets) == 2
    assert sorted(t.proba for t in top_target_res.targets) == sorted(
        t.proba for t in res.targets)[-2:]
Exemple #3
0
def test_explain_prediction_clf_xor():
    true_xs = [[np.random.randint(2), np.random.randint(2)] for _ in range(100)]
    xs = np.array([[np.random.normal(x, 0.2), np.random.normal(y, 0.2)]
                   for x, y in true_xs])
    ys = np.array([x == y for x, y in true_xs])
    clf = XGBClassifier(n_estimators=100, max_depth=2)
    clf.fit(xs, ys)
    res = explain_prediction(clf, np.array([1, 1]))
    format_as_all(res, clf)
    for x in [[0, 1], [1, 0], [0, 0], [1, 1]]:
        res = explain_prediction(clf, np.array(x))
        print(x)
        print(format_as_text(res, show=fields.WEIGHTS))
        check_targets_scores(res)
Exemple #4
0
def explain_prediction_me(x, model, feature_name_list):
    from eli5.explain import explain_prediction
    params = {}
    params['feature_names'] = feature_name_list
    #params['top'] = 5
    expl = explain_prediction(model.get_booster(), x, **params)

    #expl.targets

    for target_explanation_i in range(len(expl.targets)):
        target_explanation = expl.targets[target_explanation_i]
        print "class " + str(
            target_explanation.target) + " probability: " + str(
                target_explanation.proba) + " score: " + str(
                    target_explanation.score)
        print "Positive:"
        for feature_weight in target_explanation.feature_weights.pos:
            print str(feature_weight.feature) + ": weight: " + str(
                feature_weight.weight) + " actual value: " + str(
                    feature_weight.value)
        print "Negative:"
        for feature_weight in target_explanation.feature_weights.neg:
            print str(feature_weight.feature) + ": weight: " + str(
                feature_weight.weight) + " actual value: " + str(
                    feature_weight.value)
Exemple #5
0
def test_explain_prediction_clf_binary(newsgroups_train_binary_big, missing):
    docs, ys, target_names = newsgroups_train_binary_big
    vec = CountVectorizer(stop_words='english')
    clf = XGBClassifier(n_estimators=100, max_depth=2, missing=missing)
    xs = vec.fit_transform(docs)
    clf.fit(xs, ys)
    get_res = lambda **kwargs: explain_prediction(
        clf,
        'computer graphics in space: a sign of atheism',
        vec=vec,
        target_names=target_names,
        **kwargs)
    res = get_res()
    for expl in format_as_all(res, clf, show_feature_values=True):
        assert 'graphics' in expl
        assert 'Missing' in expl
    check_targets_scores(res)
    weights = res.targets[0].feature_weights
    pos_features = get_all_features(weights.pos)
    neg_features = get_all_features(weights.neg)
    assert 'graphics' in pos_features
    assert 'computer' in pos_features
    assert 'atheism' in neg_features

    flt_res = get_res(feature_re='gra')
    flt_pos_features = get_all_features(flt_res.targets[0].feature_weights.pos)
    assert 'graphics' in flt_pos_features
    assert 'computer' not in flt_pos_features

    flt_value_res = get_res(feature_filter=lambda _, v: not np.isnan(v))
    for expl in format_as_all(flt_value_res, clf, show_feature_values=True):
        assert 'Missing' not in expl
Exemple #6
0
def test_explain_prediction_clf_interval():
    true_xs = [[np.random.randint(3), np.random.randint(10)] for _ in range(1000)]
    xs = np.array([[np.random.normal(x, 0.2), np.random.normal(y, 0.2)]
                   for x, y in true_xs])
    ys = np.array([x == 1 for x, _ in true_xs])
    clf = XGBClassifier(n_estimators=100, max_depth=2)
    clf.fit(xs, ys)
    res = explain_prediction(clf, np.array([1.23, 1.45]))
    for expl in format_as_all(res, clf, show_feature_values=True):
        assert 'x0' in expl
        assert '1.23' in expl
    for x in [[0, 1], [1, 1], [2, 1], [0.8, 5], [1.2, 5]]:
        res = explain_prediction(clf, np.array(x))
        print(x)
        print(format_as_text(res, show=fields.WEIGHTS))
        check_targets_scores(res)
Exemple #7
0
def test_dense_missing():
    xs = np.array([[0, 1], [0, 2], [1, 2], [1, 0], [0.1, 0.1]] * 10)
    ys = np.array([0, 0, 3, 2, 0.2] * 10)
    # set too high n_estimators to check empty trees too
    reg = XGBRegressor(n_estimators=100, max_depth=2, missing=0)
    reg.fit(xs, ys)
    res = explain_prediction(reg, np.array([2, 0]))
    check_targets_scores(res)
    for expl in format_as_all(res, reg, show_feature_values=True):
        assert 'x0' in expl
        assert 'x1' in expl
        assert 'Missing' in expl
    flt_res = explain_prediction(reg, np.array([2, 0]),
                                 feature_filter=lambda _, v: not np.isnan(v))
    for expl in format_as_all(flt_res, reg, show_feature_values=True):
        assert 'x1' not in expl
        assert 'Missing' not in expl
Exemple #8
0
 def explain_prediction(self, x, column_id, feature_names):
     from eli5.explain import explain_prediction
     params = {}
     params['feature_names'] = feature_names
     params['top'] = 5
     expl = explain_prediction(self.model[column_id], x, **params)
     from eli5.formatters import format_as_text
     params_text = {}
     params_text['show_feature_values'] = True
     return format_as_text(expl, **params_text)
 def explain_prediction(self, x, model):
     from eli5.explain import explain_prediction
     params = {}
     params['feature_names'] = self.feature_name_list
     params['top'] = 5
     expl = explain_prediction(model, x, **params)
     from eli5.formatters import format_as_text
     params_text = {}
     params_text['show_feature_values'] = True
     return format_as_text(expl, **params_text)
Exemple #10
0
def test_explain_prediction_pandas_dot_in_feature_name(boston_train):
    pd = pytest.importorskip('pandas')
    X, y, feature_names = boston_train
    feature_names = ["%s.%s" % (name, idx)
                     for idx, name in enumerate(feature_names)]
    df = pd.DataFrame(X, columns=feature_names)

    reg = XGBRegressor()
    reg.fit(df, y)
    res = explain_prediction(reg, df.iloc[0])
    for expl in format_as_all(res, reg):
        assert 'PTRATIO.1' in expl
Exemple #11
0
def test_explain_prediction_feature_union_sparse(newsgroups_train_binary):
    # FeatureUnion with sparce features and text highlighting
    docs, ys, target_names = newsgroups_train_binary
    vec = FeatureUnion([
        ('word', CountVectorizer(stop_words='english')),
        ('char', CountVectorizer(ngram_range=(3, 3))),
        ])
    clf = XGBClassifier(n_estimators=100, max_depth=2, missing=0)
    xs = vec.fit_transform(docs)
    clf.fit(xs, ys)
    res = explain_prediction(
        clf, 'computer graphics in space: a sign of atheism',
        vec=vec, target_names=target_names)
    format_as_all(res, clf)
    check_targets_scores(res)
    weights = res.targets[0].feature_weights
    pos_features = get_all_features(weights.pos)
    assert 'word__graphics' in pos_features
    assert res.targets[0].weighted_spans
Exemple #12
0
def test_explain_prediction_clf_binary(
        newsgroups_train_binary_big, missing, use_booster):
    docs, ys, target_names = newsgroups_train_binary_big
    vec = CountVectorizer(stop_words='english')
    xs = vec.fit_transform(docs)
    explain_kwargs = {}
    if use_booster:
        clf = xgboost.train(
            params={'objective': 'binary:logistic',
                    'silent': True,
                    'max_depth': 2},
            dtrain=xgboost.DMatrix(xs, label=ys, missing=missing),
            num_boost_round=100,
        )
        explain_kwargs.update({'missing': missing, 'is_regression': False})
    else:
        clf = XGBClassifier(n_estimators=100, max_depth=2, missing=missing)
        clf.fit(xs, ys)
    get_res = lambda **kwargs: explain_prediction(
        clf, 'computer graphics in space: a sign of atheism',
        vec=vec, target_names=target_names, **dict(kwargs, **explain_kwargs))
    res = get_res()
    for expl in format_as_all(res, clf, show_feature_values=True):
        assert 'graphics' in expl
        assert 'Missing' in expl
    check_targets_scores(res)
    weights = res.targets[0].feature_weights
    pos_features = get_all_features(weights.pos)
    neg_features = get_all_features(weights.neg)
    assert 'graphics' in pos_features
    assert 'computer' in pos_features
    assert 'atheism' in neg_features

    flt_res = get_res(feature_re='gra')
    flt_pos_features = get_all_features(flt_res.targets[0].feature_weights.pos)
    assert 'graphics' in flt_pos_features
    assert 'computer' not in flt_pos_features

    flt_value_res = get_res(feature_filter=lambda _, v: not np.isnan(v))
    for expl in format_as_all(flt_value_res, clf, show_feature_values=True):
        assert 'Missing' not in expl
Exemple #13
0
def test_explain_prediction_feature_union_dense():
    # Test FeatureUnion handling and missing features in dense matrix
    transformer = lambda key: FunctionTransformer(
        lambda xs: np.array([[x.get(key, np.nan)] for x in xs]),
        validate=False)
    vec = FeatureUnion([('x', transformer('x')), ('y', transformer('y'))])
    gauss = np.random.normal
    data = [(gauss(1), 2 + 10 * gauss(1)) for _ in range(200)]
    ys = [-3 * x + y for x, y in data]
    xs = [{'x': gauss(x), 'y': gauss(y)} for x, y in data]
    for x in xs[:50]:
        del x['x']
    for x in xs[-50:]:
        del x['y']
    reg = XGBRegressor()
    reg.fit(vec.transform(xs), ys)
    res = explain_prediction(reg, xs[0], vec=vec, feature_names=['_x_', '_y_'])
    check_targets_scores(res)
    for expl in format_as_all(res, reg, show_feature_values=True):
        assert 'Missing' in expl
        assert '_y_' in expl
        assert '_x_' in expl