Beispiel #1
0
def kshap_explainer(lr_classifier, adult_data):
    predictor = predict_fcn(predict_type='proba',
                            clf=lr_classifier,
                            preproc=adult_data['preprocessor'])
    kshap = KernelShap(predictor=predictor,
                       link='logit',
                       feature_names=adult_data['metadata']['feature_names'])
    kshap.fit(adult_data['X_train'][:100])
    return kshap
Beispiel #2
0
def atab_explainer(lr_classifier, adult_data):
    predictor = predict_fcn(predict_type='class',
                            clf=lr_classifier,
                            preproc=adult_data['preprocessor'])
    atab = AnchorTabular(
        predictor=predictor,
        feature_names=adult_data['metadata']['feature_names'],
        categorical_names=adult_data['metadata']['category_map'])
    atab.fit(adult_data['X_train'], disc_perc=(25, 50, 75))
    return atab
Beispiel #3
0
def at_iris_explainer(get_iris_dataset, rf_classifier, request):
    """
    Instantiates and fits an AnchorTabular explainer for the Iris dataset.
    """

    predict_type = request.param
    data = get_iris_dataset
    clf, _ = rf_classifier  # preprocessor not necessary

    # instantiate and fit explainer
    pred_fn = predict_fcn(predict_type, clf)
    explainer = AnchorTabular(pred_fn, data['metadata']['feature_names'])
    explainer.fit(data['X_train'], disc_perc=(25, 50, 75))

    return data['X_test'], explainer, pred_fn, predict_type
Beispiel #4
0
def test_save_KernelShap(kshap_explainer, lr_classifier, adult_data):
    predictor = predict_fcn(predict_type='proba',
                            clf=lr_classifier,
                            preproc=adult_data['preprocessor'])
    X = adult_data['X_test'][:2]

    exp0 = kshap_explainer.explain(X)

    with tempfile.TemporaryDirectory() as temp_dir:
        kshap_explainer.save(temp_dir)
        kshap_explainer1 = load_explainer(temp_dir, predictor=predictor)

        assert isinstance(kshap_explainer1, KernelShap)
        assert kshap_explainer.meta == kshap_explainer1.meta

        exp1 = kshap_explainer.explain(X)
        assert exp0.meta == exp1.meta
Beispiel #5
0
def test_save_AnchorTabular(atab_explainer, lr_classifier, adult_data):
    predictor = predict_fcn(predict_type='class',
                            clf=lr_classifier,
                            preproc=adult_data['preprocessor'])
    X = adult_data['X_test'][0]

    exp0 = atab_explainer.explain(X)

    with tempfile.TemporaryDirectory() as temp_dir:
        atab_explainer.save(temp_dir)
        atab_explainer1 = load_explainer(temp_dir, predictor=predictor)

        assert isinstance(atab_explainer1, AnchorTabular)
        assert atab_explainer.meta == atab_explainer1.meta

        exp1 = atab_explainer1.explain(X)
        assert exp0.meta == exp1.meta
Beispiel #6
0
def at_adult_explainer(get_adult_dataset, rf_classifier, request):
    """
    Instantiates and fits an AnchorTabular explainer for the Adult dataset.
    """

    # fit random forest classifier
    predict_type = request.param
    data = get_adult_dataset
    clf, preprocessor = rf_classifier

    # instantiate and fit explainer
    pred_fn = predict_fcn(predict_type, clf, preprocessor)
    explainer = AnchorTabular(
        pred_fn,
        data['metadata']['feature_names'],
        categorical_names=data['metadata']['category_map'])
    explainer.fit(data['X_train'], disc_perc=(25, 50, 75))

    return data['X_test'], explainer, pred_fn, predict_type
Beispiel #7
0
def test_anchor_text(lr_classifier, text, n_punctuation_marks, n_unique_words,
                     predict_type, anchor, use_similarity_proba, use_unk,
                     threshold):
    # test parameters
    num_samples = 100
    sample_proba = .5
    top_n = 500
    temperature = 1.
    n_covered_ex = 5  # number of examples where the anchor applies to be returned

    # fit and initialise predictor
    clf, preprocessor = lr_classifier
    predictor = predict_fcn(predict_type, clf, preproc=preprocessor)

    # test explainer initialization
    explainer = AnchorText(nlp, predictor)
    assert explainer.predictor(['book']).shape == (1, )

    # setup explainer
    perturb_opts = {
        'use_similarity_proba': use_similarity_proba,
        'sample_proba': sample_proba,
        'temperature': temperature,
    }
    explainer.n_covered_ex = n_covered_ex
    explainer.set_words_and_pos(text)
    explainer.set_sampler_perturbation(use_unk, perturb_opts, top_n)
    explainer.set_data_type(use_unk)
    if predict_type == 'proba':
        label = np.argmax(predictor([text])[0])
    elif predict_type == 'class':
        label = predictor([text])[0]
    explainer.instance_label = label

    assert isinstance(explainer.dtype, str)
    assert len(explainer.punctuation) == n_punctuation_marks
    assert len(explainer.words) == len(explainer.positions)

    # test sampler
    cov_true, cov_false, labels, data, coverage, _ = explainer.sampler(
        (0, anchor), num_samples)
    if not anchor:
        assert coverage == -1
    if use_similarity_proba and len(
            anchor
    ) > 0:  # check that words in present are in the proposed anchor
        assert len(anchor) * data.shape[0] == data[:, anchor].sum()

    if use_unk:
        # get list of unique words
        all_words = explainer.words
        # unique words = words in text + UNK
        assert len(np.unique(all_words)) == n_unique_words

    # test explanation
    explanation = explainer.explain(
        text,
        use_unk=use_unk,
        threshold=threshold,
        use_similarity_proba=use_similarity_proba,
    )
    assert explanation.precision >= threshold
    assert explanation.raw['prediction'].item() == label
    assert explanation.meta.keys() == DEFAULT_META_ANCHOR.keys()
    assert explanation.data.keys() == DEFAULT_DATA_ANCHOR.keys()

    # check if sampled sentences are not cut short
    keys = ['covered_true', 'covered_false']
    for i in range(len(explanation.raw['feature'])):
        example_dict = explanation.raw['examples'][i]
        for k in keys:
            for example in example_dict[k]:
                # check that we have perturbed the sentences
                if use_unk:
                    assert 'UNK' in example or example.replace(
                        ' ', '') == text.replace(' ', '')
                else:
                    assert 'UNK' not in example
                assert example[-1] in ['.', 'K']
Beispiel #8
0
def test_distributed_anchor_tabular(
    ncpu,
    predict_type,
    at_defaults,
    iris_data,
    rf_classifier,
    test_instance_idx,
):
    if RAY_INSTALLED:
        import ray

        # inputs
        params = at_defaults
        threshold = params['desired_confidence']
        n_covered_ex = params[
            'n_covered_ex']  # number of covered examples to return when anchor applies
        batch_size = params[
            'batch_size']  # number of samples to draw during sampling
        n_anchors_to_sample = 6  # for testing sampling function

        # prepare the classifier and explainer
        data = iris_data
        X_test, X_train, feature_names = data['X_test'], data['X_train'], data[
            'metadata']['feature_names']
        clf, preprocessor = rf_classifier
        predictor = predict_fcn(predict_type, clf)
        explainer = DistributedAnchorTabular(predictor, feature_names, seed=0)
        explainer.fit(X_train, ncpu=ncpu)

        # select instance to be explained
        instance = X_test[test_instance_idx]
        if predict_type == 'proba':
            instance_label = np.argmax(predictor(instance.reshape(1, -1)),
                                       axis=1)
        else:
            instance_label = predictor(instance.reshape(1, -1))[0]

        # explain the instance and do basic checks on the lookups and instance labels used by samplers
        explanation = explainer.explain(instance,
                                        threshold=threshold,
                                        n_covered_ex=n_covered_ex)
        assert len(explainer.samplers) == ncpu
        actors = explainer.samplers
        for actor in actors:
            sampler = ray.get(actor._get_sampler.remote())
            ord_feats = sampler.ord_lookup.keys()
            cat_feats = sampler.cat_lookup.keys()
            enc_feats = sampler.enc2feat_idx.keys()
            assert (set(ord_feats | set(cat_feats))) == set(enc_feats)
            assert sampler.instance_label == instance_label
            assert sampler.n_covered_ex == n_covered_ex

        # check explanation
        assert explainer.instance_label == instance_label
        assert explanation.precision >= threshold
        assert explanation.coverage >= 0.05

        distrib_anchor_beam = explainer.mab
        assert len(distrib_anchor_beam.samplers) == ncpu

        # basic checks for DistributedAnchorBaseBeam
        anchor_features = list(enc_feats)
        anchor_max_len = len(anchor_features)
        assert distrib_anchor_beam.state['coverage_data'].shape[
            1] == anchor_max_len
        to_sample = []
        for _ in range(n_anchors_to_sample):
            anchor_len = np.random.randint(0, anchor_max_len)
            anchor = np.random.choice(anchor_features,
                                      anchor_len,
                                      replace=False)
            to_sample.append(tuple(anchor))
        to_sample = list(set(to_sample))
        current_state = deepcopy(distrib_anchor_beam.state)
        pos, total = distrib_anchor_beam.draw_samples(to_sample, batch_size)
        for p, t, anchor in zip(pos, total, to_sample):
            assert distrib_anchor_beam.state['t_nsamples'][
                anchor] == current_state['t_nsamples'][anchor] + t
            assert distrib_anchor_beam.state['t_positives'][
                anchor] == current_state['t_positives'][anchor] + p

        ray.shutdown()
Beispiel #9
0
def atext_explainer(lr_classifier, english_spacy_model, movie_sentiment_data):
    predictor = predict_fcn(predict_type='class',
                            clf=lr_classifier,
                            preproc=movie_sentiment_data['preprocessor'])
    atext = AnchorText(nlp=english_spacy_model, predictor=predictor)
    return atext