def setUp(self): X_train, y_train, X_test, y_test = titanic_survive() model = RandomForestClassifier(n_estimators=50, max_depth=4).fit(X_train, y_train) X_test.reset_index(drop=True, inplace=True) X_test.index = X_test.index.astype(str) X_test1, y_test1 = X_test.iloc[:100], y_test.iloc[:100] X_test2, y_test2 = X_test.iloc[100:], y_test.iloc[100:] self.explainer = ClassifierExplainer(model, X_test1, y_test1, cats=['Sex', 'Deck']) def index_exists_func(index): return index in X_test2.index def index_list_func(): # only returns first 50 indexes return list(X_test2.index[:50]) def y_func(index): idx = X_test2.index.get_loc(index) return y_test2.iloc[[idx]] def X_func(index): idx = X_test2.index.get_loc(index) return X_test2.iloc[[idx]] self.explainer.set_index_exists_func(index_exists_func) self.explainer.set_index_list_func(index_list_func) self.explainer.set_X_row_func(X_func) self.explainer.set_y_func(y_func)
def generate_assets(): X_train, y_train, X_test, y_test = titanic_survive() model = RandomForestClassifier(n_estimators=5, max_depth=2) model.fit(X_train, y_train) explainer = ClassifierExplainer( model, X_test, y_test, cats=[{'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']}, 'Deck', 'Embarked'], labels=['Not survived', 'Survived']) dashboard = ExplainerDashboard(explainer, [ ShapDependenceComposite(explainer, title="Test Tab!"), ShapDependenceComposite, "importances" ], title="Test Title!") pkl_dir = Path.cwd() / "tests" / "cli_assets" explainer.dump(pkl_dir / "explainer.joblib") explainer.to_yaml(pkl_dir / "explainer.yaml") dashboard.to_yaml(pkl_dir / "dashboard.yaml", explainerfile=str(pkl_dir / "explainer.joblib")) return None
def setUp(self): X_train, y_train, X_test, y_test = titanic_survive() train_names, test_names = titanic_names() self.model = RandomForestClassifier(n_estimators=5, max_depth=2) self.model.fit(X_train, y_train) self.explainer = ClassifierExplainer( self.model, X_test, y_test, cats=[{'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']}, 'Deck', 'Embarked'], labels=['Not survived', 'Survived']) self.dashboard = ExplainerDashboard(self.explainer, [ ShapDependenceTab(self.explainer, title="Test Tab!"), ShapDependenceTab, "importances" ], title="Test Title!") self.pkl_dir = Path.cwd() / "tests" / "cli_assets" self.explainer.dump(self.pkl_dir / "explainer.joblib") self.explainer.to_yaml(self.pkl_dir / "explainer.yaml") self.dashboard.to_yaml(self.pkl_dir / "dashboard.yaml", explainerfile=str(self.pkl_dir / "explainer.joblib"))
def setUp(self): X_train, y_train, X_test, y_test = titanic_survive() train_names, test_names = titanic_names() model = RandomForestClassifier(n_estimators=5, max_depth=2) model.fit(X_train, y_train) self.explainer = ClassifierExplainer( model, X_test, y_test, roc_auc_score, n_jobs=-1)
def setUp(self): X_train, y_train, X_test, y_test = titanic_survive() model = RandomForestClassifier(n_estimators=5, max_depth=2) model.fit(X_train, y_train) self.explainer = ClassifierExplainer( model, X_train.iloc[:50], y_train.iloc[:50], cats=[{'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']}, 'Deck', 'Embarked'], cv=3)
def setUp(self): X_train, y_train, X_test, y_test = titanic_survive() train_names, test_names = titanic_names() model = RandomForestClassifier(n_estimators=50, max_depth=5) model.fit(X_train, y_train) self.explainer = RandomForestClassifierBunch( model, X_test, y_test, roc_auc_score, cats=['Sex', 'Cabin', 'Embarked'], idxs=test_names, labels=['Not survived', 'Survived'])
def setUp(self): X_train, y_train, X_test, y_test = titanic_survive() train_names, test_names = titanic_names() model = XGBClassifier() model.fit(X_train, y_train) self.explainer = ClassifierExplainer( model, X_test, y_test, cats=[{'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']}, 'Deck', 'Embarked'], labels=['Not survived', 'Survived'])
def setUp(self): X_train, y_train, X_test, y_test = titanic_survive() train_names, test_names = titanic_names() _, self.names = titanic_names() model = RandomForestClassifier(n_estimators=5, max_depth=2) model.fit(X_train, y_train) self.explainer = ClassifierExplainer( model, X_test, y_test, cats=['Sex', 'Deck', 'Embarked'], labels=['Not survived', 'Survived'])
def setUp(self): X_train, y_train, X_test, y_test = titanic_survive() train_names, test_names = titanic_names() model = CatBoostClassifier(iterations=100, learning_rate=0.1, verbose=0) model.fit(X_train, y_train) self.explainer = ClassifierExplainer( model, X_test, y_test, roc_auc_score, shap='tree', cats=['Sex', 'Cabin', 'Embarked'], labels=['Not survived', 'Survived'], idxs=test_names)
def setUp(self): X_train, y_train, X_test, y_test = titanic_survive() train_names, test_names = titanic_names() model = LGBMClassifier() model.fit(X_train, y_train) self.explainer = ClassifierExplainer( model, X_test, y_test, roc_auc_score, shap='tree', cats=['Sex', 'Cabin', 'Embarked'], labels=['Not survived', 'Survived'], idxs=test_names)
def setUp(self): X_train, y_train, X_test, y_test = titanic_survive() train_names, test_names = titanic_names() model = CatBoostClassifier(iterations=100, learning_rate=0.1, verbose=0) model.fit(X_train, y_train) self.explainer = ClassifierExplainer( model, X_test, y_test, cats=[{'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']}, 'Deck', 'Embarked'], labels=['Not survived', 'Survived'], idxs=test_names)
def setUp(self): X_train, y_train, X_test, y_test = titanic_survive() train_names, test_names = titanic_names() model = LogisticRegression() model.fit(X_train, y_train) self.explainer = ClassifierExplainer( model, X_test, y_test, shap='linear', cats=['Sex', 'Deck', 'Embarked'], labels=['Not survived', 'Survived'], idxs=test_names)
def get_catboost_classifier(): X_train, y_train, X_test, y_test = titanic_survive() train_names, test_names = titanic_names() model = CatBoostClassifier(iterations=100, verbose=0).fit(X_train, y_train) explainer = ClassifierExplainer( model, X_test, y_test, cats=[{'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']}, 'Deck', 'Embarked'], labels=['Not survived', 'Survived'], idxs=test_names) X_cats, y_cats = explainer.X_merged, explainer.y.astype("int") model = CatBoostClassifier(iterations=5, verbose=0).fit(X_cats, y_cats, cat_features=[5, 6, 7]) explainer = ClassifierExplainer(model, X_cats, y_cats, idxs=X_test.index) explainer.calculate_properties(include_interactions=False) return explainer
def setUp(self): X_train, y_train, X_test, y_test = titanic_survive() train_names, test_names = titanic_names() model = RandomForestClassifier(n_estimators=5, max_depth=2) model.fit(X_train, y_train) self.explainer = ClassifierExplainer( model, X_test, y_test, cats=[{ 'Gender': ['Sex_female', 'Sex_male', 'Sex_nan'] }, 'Deck', 'Embarked'], target='Survival', labels=['Not survived', 'Survived'], idxs=test_names)
def setUp(self): X_train, y_train, X_test, y_test = titanic_survive() train_names, test_names = titanic_names() model = LogisticRegression() model.fit(X_train, y_train) self.explainer = ClassifierExplainer( model, X_test.iloc[:20], y_test.iloc[:20], shap='kernel', model_output='probability', X_background=shap.sample(X_train, 5), cats=[{ 'Gender': ['Sex_female', 'Sex_male', 'Sex_nan'] }, 'Deck', 'Embarked'], labels=['Not survived', 'Survived'])
def get_classification_explainer(include_y=True): X_train, y_train, X_test, y_test = titanic_survive() train_names, test_names = titanic_names() model = XGBClassifier().fit(X_train, y_train) if include_y: explainer = ClassifierExplainer( model, X_test, y_test, cats=['Sex', 'Cabin', 'Embarked'], labels=['Not survived', 'Survived'], idxs=test_names) else: explainer = ClassifierExplainer( model, X_test, cats=['Sex', 'Cabin', 'Embarked'], labels=['Not survived', 'Survived'], idxs=test_names) explainer.calculate_properties() return explainer
def get_classification_explainer(xgboost=False, include_y=True): X_train, y_train, X_test, y_test = titanic_survive() if xgboost: model = XGBClassifier().fit(X_train, y_train) else: model = RandomForestClassifier(n_estimators=50, max_depth=10).fit(X_train, y_train) if include_y: explainer = ClassifierExplainer( model, X_test, y_test, cats=['Sex', 'Deck', 'Embarked'], labels=['Not survived', 'Survived']) else: explainer = ClassifierExplainer( model, X_test, cats=['Sex', 'Deck', 'Embarked'], labels=['Not survived', 'Survived']) explainer.calculate_properties() return explainer
# Have to run on http://127.0.0.1:8050/ to work, to figure out if can work with streamlit or not. # Import libraries here from sklearn.ensemble import RandomForestClassifier from explainerdashboard import ClassifierExplainer, ExplainerDashboard from explainerdashboard.datasets import titanic_survive, feature_descriptions X_train, y_train, X_test, y_test = titanic_survive() model = RandomForestClassifier(n_estimators=50, max_depth=10).fit(X_train, y_train) explainer = ClassifierExplainer(model, X_test, y_test, cats=['Sex', 'Deck', 'Embarked'], descriptions=feature_descriptions, labels=['Not survived', 'Survived']) ExplainerDashboard(explainer).run()
def test_explainer_len(self): self.assertEqual(len(self.explainer), len(titanic_survive()[2]))