def setUp(self):
        X_train, y_train, X_test, y_test = titanic_survive()
        model = RandomForestClassifier(n_estimators=50,
                                       max_depth=4).fit(X_train, y_train)

        X_test.reset_index(drop=True, inplace=True)
        X_test.index = X_test.index.astype(str)

        X_test1, y_test1 = X_test.iloc[:100], y_test.iloc[:100]
        X_test2, y_test2 = X_test.iloc[100:], y_test.iloc[100:]

        self.explainer = ClassifierExplainer(model,
                                             X_test1,
                                             y_test1,
                                             cats=['Sex', 'Deck'])

        def index_exists_func(index):
            return index in X_test2.index

        def index_list_func():
            # only returns first 50 indexes
            return list(X_test2.index[:50])

        def y_func(index):
            idx = X_test2.index.get_loc(index)
            return y_test2.iloc[[idx]]

        def X_func(index):
            idx = X_test2.index.get_loc(index)
            return X_test2.iloc[[idx]]

        self.explainer.set_index_exists_func(index_exists_func)
        self.explainer.set_index_list_func(index_list_func)
        self.explainer.set_X_row_func(X_func)
        self.explainer.set_y_func(y_func)
Ejemplo n.º 2
0
def generate_assets():
    X_train, y_train, X_test, y_test = titanic_survive()

    model = RandomForestClassifier(n_estimators=5, max_depth=2)
    model.fit(X_train, y_train)

    explainer = ClassifierExplainer(
                        model, X_test, y_test, 
                        cats=[{'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']}, 
                                            'Deck', 'Embarked'],
                        labels=['Not survived', 'Survived'])

    dashboard = ExplainerDashboard(explainer, 
        [
            ShapDependenceComposite(explainer, title="Test Tab!"),
            ShapDependenceComposite, 
            "importances"
        ], title="Test Title!")

    pkl_dir = Path.cwd() / "tests" / "cli_assets" 
    explainer.dump(pkl_dir / "explainer.joblib")
    explainer.to_yaml(pkl_dir / "explainer.yaml")
    dashboard.to_yaml(pkl_dir / "dashboard.yaml", 
                explainerfile=str(pkl_dir / "explainer.joblib"))
    return None
Ejemplo n.º 3
0
    def setUp(self):
        X_train, y_train, X_test, y_test = titanic_survive()
        train_names, test_names = titanic_names()

        self.model = RandomForestClassifier(n_estimators=5, max_depth=2)
        self.model.fit(X_train, y_train)

        self.explainer = ClassifierExplainer(
                            self.model, X_test, y_test, 
                            cats=[{'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']}, 
                                                'Deck', 'Embarked'],
                            labels=['Not survived', 'Survived'])

        self.dashboard = ExplainerDashboard(self.explainer, 
            [
                ShapDependenceTab(self.explainer, title="Test Tab!"),
                ShapDependenceTab, 
                "importances"
            ], title="Test Title!")

        self.pkl_dir = Path.cwd() / "tests" / "cli_assets" 
        self.explainer.dump(self.pkl_dir / "explainer.joblib")
        self.explainer.to_yaml(self.pkl_dir / "explainer.yaml")
        self.dashboard.to_yaml(self.pkl_dir / "dashboard.yaml", 
                    explainerfile=str(self.pkl_dir / "explainer.joblib"))
    def setUp(self):
        X_train, y_train, X_test, y_test = titanic_survive()
        train_names, test_names = titanic_names()

        model = RandomForestClassifier(n_estimators=5, max_depth=2)
        model.fit(X_train, y_train)

        self.explainer = ClassifierExplainer(
                            model, X_test, y_test, roc_auc_score, n_jobs=-1)
Ejemplo n.º 5
0
    def setUp(self):
        X_train, y_train, X_test, y_test = titanic_survive()

        model = RandomForestClassifier(n_estimators=5, max_depth=2)
        model.fit(X_train, y_train)

        self.explainer = ClassifierExplainer(
                            model, X_train.iloc[:50], y_train.iloc[:50], 
                            cats=[{'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']}, 
                                                'Deck', 'Embarked'],
                            cv=3)
Ejemplo n.º 6
0
    def setUp(self):
        X_train, y_train, X_test, y_test = titanic_survive()
        train_names, test_names = titanic_names()

        model = RandomForestClassifier(n_estimators=50, max_depth=5)
        model.fit(X_train, y_train)

        self.explainer = RandomForestClassifierBunch(
                            model, X_test, y_test, roc_auc_score, 
                            cats=['Sex', 'Cabin', 'Embarked'],
                            idxs=test_names, 
                            labels=['Not survived', 'Survived'])
Ejemplo n.º 7
0
    def setUp(self):
        X_train, y_train, X_test, y_test = titanic_survive()
        train_names, test_names = titanic_names()

        model = XGBClassifier()
        model.fit(X_train, y_train)

        self.explainer = ClassifierExplainer(
                            model, X_test, y_test, 
                            cats=[{'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']}, 
                                                'Deck', 'Embarked'],
                            labels=['Not survived', 'Survived'])
Ejemplo n.º 8
0
    def setUp(self):
        X_train, y_train, X_test, y_test = titanic_survive()
        train_names, test_names = titanic_names()
        _, self.names = titanic_names()

        model = RandomForestClassifier(n_estimators=5, max_depth=2)
        model.fit(X_train, y_train)

        self.explainer = ClassifierExplainer(
                            model, X_test, y_test, 
                            cats=['Sex', 'Deck', 'Embarked'],
                            labels=['Not survived', 'Survived'])
    def setUp(self):
        X_train, y_train, X_test, y_test = titanic_survive()
        train_names, test_names = titanic_names()

        model = CatBoostClassifier(iterations=100, learning_rate=0.1, verbose=0)
        model.fit(X_train, y_train)

        self.explainer = ClassifierExplainer(
                            model, X_test, y_test, roc_auc_score, 
                            shap='tree',
                            cats=['Sex', 'Cabin', 'Embarked'],
                            labels=['Not survived', 'Survived'],
                            idxs=test_names)
    def setUp(self):
        X_train, y_train, X_test, y_test = titanic_survive()
        train_names, test_names = titanic_names()

        model = LGBMClassifier()
        model.fit(X_train, y_train)

        self.explainer = ClassifierExplainer(
                            model, X_test, y_test, roc_auc_score, 
                            shap='tree',
                            cats=['Sex', 'Cabin', 'Embarked'],
                            labels=['Not survived', 'Survived'],
                            idxs=test_names)
Ejemplo n.º 11
0
    def setUp(self):
        X_train, y_train, X_test, y_test = titanic_survive()
        train_names, test_names = titanic_names()

        model = CatBoostClassifier(iterations=100, learning_rate=0.1, verbose=0)
        model.fit(X_train, y_train)

        self.explainer = ClassifierExplainer(
                            model, X_test, y_test, 
                            cats=[{'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']}, 
                                                'Deck', 'Embarked'],
                            labels=['Not survived', 'Survived'],
                            idxs=test_names)
Ejemplo n.º 12
0
    def setUp(self):
        X_train, y_train, X_test, y_test = titanic_survive()
        train_names, test_names = titanic_names()

        model = LogisticRegression()
        model.fit(X_train, y_train)

        self.explainer = ClassifierExplainer(
            model,
            X_test,
            y_test,
            shap='linear',
            cats=['Sex', 'Deck', 'Embarked'],
            labels=['Not survived', 'Survived'],
            idxs=test_names)
def get_catboost_classifier():
    X_train, y_train, X_test, y_test = titanic_survive()
    train_names, test_names = titanic_names()

    model = CatBoostClassifier(iterations=100, verbose=0).fit(X_train, y_train)
    explainer = ClassifierExplainer(
                        model, X_test, y_test, 
                        cats=[{'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']}, 
                                            'Deck', 'Embarked'],
                        labels=['Not survived', 'Survived'],
                        idxs=test_names)

    X_cats, y_cats = explainer.X_merged, explainer.y.astype("int")
    model = CatBoostClassifier(iterations=5, verbose=0).fit(X_cats, y_cats, cat_features=[5, 6, 7])
    explainer = ClassifierExplainer(model, X_cats, y_cats, idxs=X_test.index)
    explainer.calculate_properties(include_interactions=False)
    return explainer
    def setUp(self):
        X_train, y_train, X_test, y_test = titanic_survive()
        train_names, test_names = titanic_names()

        model = RandomForestClassifier(n_estimators=5, max_depth=2)
        model.fit(X_train, y_train)

        self.explainer = ClassifierExplainer(
            model,
            X_test,
            y_test,
            cats=[{
                'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']
            }, 'Deck', 'Embarked'],
            target='Survival',
            labels=['Not survived', 'Survived'],
            idxs=test_names)
Ejemplo n.º 15
0
    def setUp(self):
        X_train, y_train, X_test, y_test = titanic_survive()
        train_names, test_names = titanic_names()

        model = LogisticRegression()
        model.fit(X_train, y_train)

        self.explainer = ClassifierExplainer(
            model,
            X_test.iloc[:20],
            y_test.iloc[:20],
            shap='kernel',
            model_output='probability',
            X_background=shap.sample(X_train, 5),
            cats=[{
                'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']
            }, 'Deck', 'Embarked'],
            labels=['Not survived', 'Survived'])
def get_classification_explainer(include_y=True):
    X_train, y_train, X_test, y_test = titanic_survive()
    train_names, test_names = titanic_names()
    model = XGBClassifier().fit(X_train, y_train)
    if include_y:
        explainer = ClassifierExplainer(
                            model, X_test, y_test, 
                            cats=['Sex', 'Cabin', 'Embarked'],
                            labels=['Not survived', 'Survived'],
                            idxs=test_names)
    else:
        explainer = ClassifierExplainer(
                            model, X_test, 
                            cats=['Sex', 'Cabin', 'Embarked'],
                            labels=['Not survived', 'Survived'],
                            idxs=test_names)

    explainer.calculate_properties()
    return explainer
def get_classification_explainer(xgboost=False, include_y=True):
    X_train, y_train, X_test, y_test = titanic_survive()
    if xgboost:
        model = XGBClassifier().fit(X_train, y_train)
    else:
        model = RandomForestClassifier(n_estimators=50, max_depth=10).fit(X_train, y_train)
    if include_y:
        explainer = ClassifierExplainer(
                            model, X_test, y_test, 
                            cats=['Sex', 'Deck', 'Embarked'],
                            labels=['Not survived', 'Survived'])
    else:
        explainer = ClassifierExplainer(
                            model, X_test, 
                            cats=['Sex', 'Deck', 'Embarked'],
                            labels=['Not survived', 'Survived'])

    explainer.calculate_properties()
    return explainer
Ejemplo n.º 18
0
# Have to run on http://127.0.0.1:8050/ to work, to figure out if can work with streamlit or not.

# Import libraries here
from sklearn.ensemble import RandomForestClassifier

from explainerdashboard import ClassifierExplainer, ExplainerDashboard
from explainerdashboard.datasets import titanic_survive, feature_descriptions

X_train, y_train, X_test, y_test = titanic_survive()
model = RandomForestClassifier(n_estimators=50,
                               max_depth=10).fit(X_train, y_train)

explainer = ClassifierExplainer(model,
                                X_test,
                                y_test,
                                cats=['Sex', 'Deck', 'Embarked'],
                                descriptions=feature_descriptions,
                                labels=['Not survived', 'Survived'])

ExplainerDashboard(explainer).run()
 def test_explainer_len(self):
     self.assertEqual(len(self.explainer), len(titanic_survive()[2]))