def kshap_explainer(lr_classifier, adult_data): predictor = predict_fcn(predict_type='proba', clf=lr_classifier, preproc=adult_data['preprocessor']) kshap = KernelShap(predictor=predictor, link='logit', feature_names=adult_data['metadata']['feature_names']) kshap.fit(adult_data['X_train'][:100]) return kshap
def atab_explainer(lr_classifier, adult_data): predictor = predict_fcn(predict_type='class', clf=lr_classifier, preproc=adult_data['preprocessor']) atab = AnchorTabular( predictor=predictor, feature_names=adult_data['metadata']['feature_names'], categorical_names=adult_data['metadata']['category_map']) atab.fit(adult_data['X_train'], disc_perc=(25, 50, 75)) return atab
def at_iris_explainer(get_iris_dataset, rf_classifier, request): """ Instantiates and fits an AnchorTabular explainer for the Iris dataset. """ predict_type = request.param data = get_iris_dataset clf, _ = rf_classifier # preprocessor not necessary # instantiate and fit explainer pred_fn = predict_fcn(predict_type, clf) explainer = AnchorTabular(pred_fn, data['metadata']['feature_names']) explainer.fit(data['X_train'], disc_perc=(25, 50, 75)) return data['X_test'], explainer, pred_fn, predict_type
def test_save_KernelShap(kshap_explainer, lr_classifier, adult_data): predictor = predict_fcn(predict_type='proba', clf=lr_classifier, preproc=adult_data['preprocessor']) X = adult_data['X_test'][:2] exp0 = kshap_explainer.explain(X) with tempfile.TemporaryDirectory() as temp_dir: kshap_explainer.save(temp_dir) kshap_explainer1 = load_explainer(temp_dir, predictor=predictor) assert isinstance(kshap_explainer1, KernelShap) assert kshap_explainer.meta == kshap_explainer1.meta exp1 = kshap_explainer.explain(X) assert exp0.meta == exp1.meta
def test_save_AnchorTabular(atab_explainer, lr_classifier, adult_data): predictor = predict_fcn(predict_type='class', clf=lr_classifier, preproc=adult_data['preprocessor']) X = adult_data['X_test'][0] exp0 = atab_explainer.explain(X) with tempfile.TemporaryDirectory() as temp_dir: atab_explainer.save(temp_dir) atab_explainer1 = load_explainer(temp_dir, predictor=predictor) assert isinstance(atab_explainer1, AnchorTabular) assert atab_explainer.meta == atab_explainer1.meta exp1 = atab_explainer1.explain(X) assert exp0.meta == exp1.meta
def at_adult_explainer(get_adult_dataset, rf_classifier, request): """ Instantiates and fits an AnchorTabular explainer for the Adult dataset. """ # fit random forest classifier predict_type = request.param data = get_adult_dataset clf, preprocessor = rf_classifier # instantiate and fit explainer pred_fn = predict_fcn(predict_type, clf, preprocessor) explainer = AnchorTabular( pred_fn, data['metadata']['feature_names'], categorical_names=data['metadata']['category_map']) explainer.fit(data['X_train'], disc_perc=(25, 50, 75)) return data['X_test'], explainer, pred_fn, predict_type
def test_anchor_text(lr_classifier, text, n_punctuation_marks, n_unique_words, predict_type, anchor, use_similarity_proba, use_unk, threshold): # test parameters num_samples = 100 sample_proba = .5 top_n = 500 temperature = 1. n_covered_ex = 5 # number of examples where the anchor applies to be returned # fit and initialise predictor clf, preprocessor = lr_classifier predictor = predict_fcn(predict_type, clf, preproc=preprocessor) # test explainer initialization explainer = AnchorText(nlp, predictor) assert explainer.predictor(['book']).shape == (1, ) # setup explainer perturb_opts = { 'use_similarity_proba': use_similarity_proba, 'sample_proba': sample_proba, 'temperature': temperature, } explainer.n_covered_ex = n_covered_ex explainer.set_words_and_pos(text) explainer.set_sampler_perturbation(use_unk, perturb_opts, top_n) explainer.set_data_type(use_unk) if predict_type == 'proba': label = np.argmax(predictor([text])[0]) elif predict_type == 'class': label = predictor([text])[0] explainer.instance_label = label assert isinstance(explainer.dtype, str) assert len(explainer.punctuation) == n_punctuation_marks assert len(explainer.words) == len(explainer.positions) # test sampler cov_true, cov_false, labels, data, coverage, _ = explainer.sampler( (0, anchor), num_samples) if not anchor: assert coverage == -1 if use_similarity_proba and len( anchor ) > 0: # check that words in present are in the proposed anchor assert len(anchor) * data.shape[0] == data[:, anchor].sum() if use_unk: # get list of unique words all_words = explainer.words # unique words = words in text + UNK assert len(np.unique(all_words)) == n_unique_words # test explanation explanation = explainer.explain( text, use_unk=use_unk, threshold=threshold, use_similarity_proba=use_similarity_proba, ) assert explanation.precision >= threshold assert explanation.raw['prediction'].item() == label assert explanation.meta.keys() == DEFAULT_META_ANCHOR.keys() assert explanation.data.keys() == DEFAULT_DATA_ANCHOR.keys() # check if sampled sentences are not cut short keys = ['covered_true', 'covered_false'] for i in range(len(explanation.raw['feature'])): example_dict = explanation.raw['examples'][i] for k in keys: for example in example_dict[k]: # check that we have perturbed the sentences if use_unk: assert 'UNK' in example or example.replace( ' ', '') == text.replace(' ', '') else: assert 'UNK' not in example assert example[-1] in ['.', 'K']
def test_distributed_anchor_tabular( ncpu, predict_type, at_defaults, iris_data, rf_classifier, test_instance_idx, ): if RAY_INSTALLED: import ray # inputs params = at_defaults threshold = params['desired_confidence'] n_covered_ex = params[ 'n_covered_ex'] # number of covered examples to return when anchor applies batch_size = params[ 'batch_size'] # number of samples to draw during sampling n_anchors_to_sample = 6 # for testing sampling function # prepare the classifier and explainer data = iris_data X_test, X_train, feature_names = data['X_test'], data['X_train'], data[ 'metadata']['feature_names'] clf, preprocessor = rf_classifier predictor = predict_fcn(predict_type, clf) explainer = DistributedAnchorTabular(predictor, feature_names, seed=0) explainer.fit(X_train, ncpu=ncpu) # select instance to be explained instance = X_test[test_instance_idx] if predict_type == 'proba': instance_label = np.argmax(predictor(instance.reshape(1, -1)), axis=1) else: instance_label = predictor(instance.reshape(1, -1))[0] # explain the instance and do basic checks on the lookups and instance labels used by samplers explanation = explainer.explain(instance, threshold=threshold, n_covered_ex=n_covered_ex) assert len(explainer.samplers) == ncpu actors = explainer.samplers for actor in actors: sampler = ray.get(actor._get_sampler.remote()) ord_feats = sampler.ord_lookup.keys() cat_feats = sampler.cat_lookup.keys() enc_feats = sampler.enc2feat_idx.keys() assert (set(ord_feats | set(cat_feats))) == set(enc_feats) assert sampler.instance_label == instance_label assert sampler.n_covered_ex == n_covered_ex # check explanation assert explainer.instance_label == instance_label assert explanation.precision >= threshold assert explanation.coverage >= 0.05 distrib_anchor_beam = explainer.mab assert len(distrib_anchor_beam.samplers) == ncpu # basic checks for DistributedAnchorBaseBeam anchor_features = list(enc_feats) anchor_max_len = len(anchor_features) assert distrib_anchor_beam.state['coverage_data'].shape[ 1] == anchor_max_len to_sample = [] for _ in range(n_anchors_to_sample): anchor_len = np.random.randint(0, anchor_max_len) anchor = np.random.choice(anchor_features, anchor_len, replace=False) to_sample.append(tuple(anchor)) to_sample = list(set(to_sample)) current_state = deepcopy(distrib_anchor_beam.state) pos, total = distrib_anchor_beam.draw_samples(to_sample, batch_size) for p, t, anchor in zip(pos, total, to_sample): assert distrib_anchor_beam.state['t_nsamples'][ anchor] == current_state['t_nsamples'][anchor] + t assert distrib_anchor_beam.state['t_positives'][ anchor] == current_state['t_positives'][anchor] + p ray.shutdown()
def atext_explainer(lr_classifier, english_spacy_model, movie_sentiment_data): predictor = predict_fcn(predict_type='class', clf=lr_classifier, preproc=movie_sentiment_data['preprocessor']) atext = AnchorText(nlp=english_spacy_model, predictor=predictor) return atext