Esempio n. 1
0
def generate_assets():
    X_train, y_train, X_test, y_test = titanic_survive()

    model = RandomForestClassifier(n_estimators=5, max_depth=2)
    model.fit(X_train, y_train)

    explainer = ClassifierExplainer(
                        model, X_test, y_test, 
                        cats=[{'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']}, 
                                            'Deck', 'Embarked'],
                        labels=['Not survived', 'Survived'])

    dashboard = ExplainerDashboard(explainer, 
        [
            ShapDependenceComposite(explainer, title="Test Tab!"),
            ShapDependenceComposite, 
            "importances"
        ], title="Test Title!")

    pkl_dir = Path.cwd() / "tests" / "cli_assets" 
    explainer.dump(pkl_dir / "explainer.joblib")
    explainer.to_yaml(pkl_dir / "explainer.yaml")
    dashboard.to_yaml(pkl_dir / "dashboard.yaml", 
                explainerfile=str(pkl_dir / "explainer.joblib"))
    return None
    def setUp(self):
        X_train, y_train, X_test, y_test = titanic_survive()
        train_names, test_names = titanic_names()

        self.model = RandomForestClassifier(n_estimators=5, max_depth=2)
        self.model.fit(X_train, y_train)

        self.explainer = ClassifierExplainer(
                            self.model, X_test, y_test, 
                            cats=[{'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']}, 
                                                'Deck', 'Embarked'],
                            labels=['Not survived', 'Survived'])

        self.dashboard = ExplainerDashboard(self.explainer, 
            [
                ShapDependenceTab(self.explainer, title="Test Tab!"),
                ShapDependenceTab, 
                "importances"
            ], title="Test Title!")

        self.pkl_dir = Path.cwd() / "tests" / "cli_assets" 
        self.explainer.dump(self.pkl_dir / "explainer.joblib")
        self.explainer.to_yaml(self.pkl_dir / "explainer.yaml")
        self.dashboard.to_yaml(self.pkl_dir / "dashboard.yaml", 
                    explainerfile=str(self.pkl_dir / "explainer.joblib"))
class DashboardTests(unittest.TestCase):
    def setUp(self):
        X_train, y_train, X_test, y_test = titanic_survive()
        train_names, test_names = titanic_names()

        self.model = RandomForestClassifier(n_estimators=5, max_depth=2)
        self.model.fit(X_train, y_train)

        self.explainer = ClassifierExplainer(
                            self.model, X_test, y_test, 
                            cats=[{'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']}, 
                                                'Deck', 'Embarked'],
                            labels=['Not survived', 'Survived'])

        self.dashboard = ExplainerDashboard(self.explainer, 
            [
                ShapDependenceTab(self.explainer, title="Test Tab!"),
                ShapDependenceTab, 
                "importances"
            ], title="Test Title!")

        self.pkl_dir = Path.cwd() / "tests" / "cli_assets" 
        self.explainer.dump(self.pkl_dir / "explainer.joblib")
        self.explainer.to_yaml(self.pkl_dir / "explainer.yaml")
        self.dashboard.to_yaml(self.pkl_dir / "dashboard.yaml", 
                    explainerfile=str(self.pkl_dir / "explainer.joblib"))

    def test_yaml(self):
        yaml = self.dashboard.to_yaml()
        self.assertIsInstance(yaml, str)

    def test_yaml_dict(self):
        yaml_dict = self.dashboard.to_yaml(return_dict=True)
        self.assertIsInstance(yaml_dict, dict)
        self.assertIn("dashboard", yaml_dict)

    def test_load_config_joblib(self):
        db = ExplainerDashboard.from_config(
            self.pkl_dir / "explainer.joblib",
            self.pkl_dir / "dashboard.yaml")
        self.assertIsInstance(db, ExplainerDashboard)

    def test_load_config_yaml(self):
        db = ExplainerDashboard.from_config(
            self.pkl_dir / "dashboard.yaml")
        self.assertIsInstance(db, ExplainerDashboard)

    def test_load_config_explainer(self):
        db = ExplainerDashboard.from_config(
            self.explainer, self.pkl_dir / "dashboard.yaml")
        self.assertIsInstance(db, ExplainerDashboard)
        
Esempio n. 4
0
            def dashboard_exp(model, X_data, y_data):
                import dash_bootstrap_components as dbc

                from explainerdashboard import RegressionExplainer, ExplainerDashboard
                ExplainerDashboard(
                    RegressionExplainer(model, X_data, y_data),
                    bootstrap=dbc.themes.SANDSTONE,
                    importances=True,
                    model_summary=False,
                    contributions=True,
                    whatif=True,
                    shap_dependence=False,
                    shap_interaction=False,
                    decision_trees=False,
                    hide_whatifindexselector=True,
                    hide_whatifprediction=True,
                    hide_inputeditor=False,
                    hide_whatifcontributiongraph=False,
                    hide_whatifcontributiontable=True,
                    hide_whatifpdp=False,
                    hide_predindexselector=True,
                    hide_predictionsummary=True,
                    hide_contributiongraph=False,
                    hide_pdp=False,
                    hide_contributiontable=True,
                    hide_dropna=True,
                    hide_range=True,
                    hide_depth=True,
                    hide_sort=True,
                    hide_sample=True,  # hide sample size input on pdp component
                    hide_gridlines=True,  # hide gridlines on pdp component
                    hide_gridpoints=True,
                    hide_cats_sort=
                    True,  # hide the sorting option for categorical features
                    hide_cutoff=
                    True,  # hide cutoff selector on classification components
                    hide_percentage=
                    True,  # hide percentage toggle on classificaiton components
                    hide_log_x=
                    True,  # hide x-axis logs toggle on regression plots
                    hide_log_y=
                    True,  # hide y-axis logs toggle on regression plots
                    hide_ratio=True,  # hide the residuals type dropdown
                    hide_points=
                    True,  # hide the show violin scatter markers toggle
                    hide_winsor=True,  # hide the winsorize input
                    hide_wizard=
                    True,  # hide the wizard toggle in lift curve component
                    hide_star_explanation=True,
                ).run()
Esempio n. 5
0
    model = pickle.load(open(MODELS_DIR / 'general_model.pkl', 'rb'))
    y = pd.read_csv(DATA_DIR / 'general_target.csv',
                    index_col=['Ticker']).drop(columns=['Date'])
    X = pd.read_csv(DATA_DIR / f'general_features.csv',
                    index_col=['Ticker']).drop(columns=['Date'])

    # Dashboard Explainer is fussy about Column Names
    X.columns = X.columns.str.replace('.', '')
    feature_names = model.get_booster().feature_names
    feature_names = [x.replace('.', '') for x in feature_names]
    model.get_booster().feature_names = feature_names

    explainer = RegressionExplainer(model, X, y)

    db = ExplainerDashboard(
        explainer,
        title="Stock Valuation Explainer",
        description=
        "Visit https://share.streamlit.io/gardnmi/fundamental-stock-prediction to see the model in use,",
        shap_interaction=False,
        precision='float32',
        decision_trees=False)

    db.to_yaml("dashboard.yaml",
               explainerfile="explainer.joblib",
               dump_explainer=True)

db = ExplainerDashboard.from_config("dashboard.yaml")
app = db.flask_server()
Esempio n. 6
0
from unittest.mock import MagicMock
import sys
sys.modules["xgboost"] = MagicMock()

from explainerdashboard import RegressionExplainer, ExplainerDashboard

explainer = RegressionExplainer.from_file("explainer.joblib")
# you can override params during load from_config:
db = ExplainerDashboard.from_config(explainer, "dashboard.yaml", title="Test")

app = db.flask_server()

# run waitress-serve --port=8070 dashboard:app in command line
Esempio n. 7
0
# Have to run on http://127.0.0.1:8050/ to work, to figure out if can work with streamlit or not.

# Import libraries here
from sklearn.ensemble import RandomForestClassifier

from explainerdashboard import ClassifierExplainer, ExplainerDashboard
from explainerdashboard.datasets import titanic_survive, feature_descriptions

X_train, y_train, X_test, y_test = titanic_survive()
model = RandomForestClassifier(n_estimators=50,
                               max_depth=10).fit(X_train, y_train)

explainer = ClassifierExplainer(model,
                                X_test,
                                y_test,
                                cats=['Sex', 'Deck', 'Embarked'],
                                descriptions=feature_descriptions,
                                labels=['Not survived', 'Survived'])

ExplainerDashboard(explainer).run()
from explainerdashboard import RegressionExplainer, ExplainerDashboard
from explainerdashboard.custom import *
from joblib import load
import pandas as pd
from sklearn.model_selection import train_test_split

# Load model & data
dec_tree = load('dec_tree_v3.joblib')
df_data = pd.read_csv('data_v3_enc.csv')

# Prepare & split data
X = df_data.drop(['Duration', 'Timestamp'], axis=1)
y = df_data.Duration
Xt, X_small, yt, y_small = train_test_split(X, y, test_size=0.01, random_state=0)

exp = RegressionExplainer(dec_tree, X_small, y_small, cats=['Day_of_week', 'Hour', 'Vehicle', 'Position'])

# Build
db = ExplainerDashboard(exp, [ShapDependenceComposite, WhatIfComposite], hide_whatifpdp=True)

# Save
exp.dump("explainer.joblib")
db.to_yaml("dashboard.yaml")
Esempio n. 9
0
                         n_jobs=8,
                         num_parallel_tree=1,
                         objective='reg:squarederror',
                         random_state=42,
                         reg_alpha=1,
                         reg_lambda=0,
                         scale_pos_weight=1.0,
                         seed=42,
                         subsample=0.6000000000000001,
                         tree_method='exact',
                         validate_parameters=1,
                         verbosity=None)

X, Y = pd.read_csv('X.csv', index_col=0), pd.read_csv('Y.csv', index_col=0)

REmodel = model.fit(X, Y)
explainer = RegressionExplainer(REmodel,
                                X,
                                Y,
                                cats=cats,
                                descriptions=feature_descriptions,
                                units="$")

ExplainerDashboard(
    explainer,
    title='XGBoost Regression Model Explainer: Predicting House Prices',
    description=
    'This dashboard shows the inner workings of a fitted machine learning model, and explains its predictions.',
    shap_interaction=False,
    decision_trees=False).run(port=int(os.environ.get('PORT', 5000)))
 def test_load_config_explainer(self):
     db = ExplainerDashboard.from_config(
         self.explainer, self.pkl_dir / "dashboard.yaml")
     self.assertIsInstance(db, ExplainerDashboard)
     
Esempio n. 11
0
    print('Incoming request')
    return Response('Hello')


model = CatBoostClassifier(cat_features=list(d.columns))
print(list(d.columns))
model.load_model("Diabetes (1).cbm")

y = pd.DataFrame()
y["class"] = d["class"]

d.pop("class")
y["class"], _ = y["class"].factorize()
y["class"] = 1 - y["class"]

db = ExplainerDashboard.from_config("dashboard.yaml")


def dashboard(request):
    return db.app.index()


if __name__ == '__main__':
    port = int(os.environ.get("PORT", 2000))

    with Configurator() as config:
        config.add_route('hello', '/')
        config.add_view(dashboard, route_name='hello')
        app = config.make_wsgi_app()
    serve(app, host='0.0.0.0', port=port)
Esempio n. 12
0
from explainerdashboard import ClassifierExplainer, RegressionExplainer, ExplainerDashboard
from explainerdashboard.datasets import *

pkl_dir = Path.cwd() / "pkls"

# classifier
X_train, y_train, X_test, y_test = titanic_survive()
model = RandomForestClassifier(n_estimators=50,
                               max_depth=5).fit(X_train, y_train)
clas_explainer = ClassifierExplainer(model,
                                     X_test,
                                     y_test,
                                     cats=['Sex', 'Deck', 'Embarked'],
                                     descriptions=feature_descriptions,
                                     labels=['Not survived', 'Survived'])
_ = ExplainerDashboard(clas_explainer)
clas_explainer.dump(pkl_dir / "clas_explainer.joblib")

# regression
X_train, y_train, X_test, y_test = titanic_fare()
model = RandomForestRegressor(n_estimators=50,
                              max_depth=5).fit(X_train, y_train)
reg_explainer = RegressionExplainer(model,
                                    X_test,
                                    y_test,
                                    cats=['Sex', 'Deck', 'Embarked'],
                                    descriptions=feature_descriptions,
                                    units="$")
_ = ExplainerDashboard(reg_explainer)
reg_explainer.dump(pkl_dir / "reg_explainer.joblib")
Esempio n. 13
0
from explainerdashboard.explainers import RandomForestRegressionExplainer
from sklearn.ensemble import RandomForestRegressor
from sklearn import tree
from explainerdashboard import ClassifierExplainer, ExplainerDashboard, RegressionExplainer
from explainerdashboard.datasets import titanic_survive, feature_descriptions
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np


imdb = pd.read_csv("Amélioration/Data/movies2.csv", encoding="latin-1")
imdb = imdb.rename(columns = {'11:14' :'film','7.2' : 'metascore','Crime' : 'genre', 'Greg Marcks':'realisateur', 	'Henry Thomas' : 'acteur_1', 'Colin Hanks':'acteur_2' ,'6000000' : 'budget',	'0': 'votes2',	'0.1': 'vote',	'Aug 12, 2005': 'date'})
colonne = ['genre','acteur_1', 'acteur_2', 'realisateur']
imdb = pd.get_dummies(imdb, columns= colonne)
imdb = imdb.drop(columns= ['film', 'budget', 'votes2', 'vote', 'date'], axis = 1 )


X = imdb.loc[:, imdb.columns != 'metascore' ]
y = imdb.loc[:, imdb.columns == 'metascore' ]

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state=42)

model = RandomForestRegressor().fit(X_train, y_train)
explainer = RegressionExplainer(model, X_test, y_test)

db = ExplainerDashboard(explainer, title="Metascore de film",
                    whatif=False, # you can switch off tabs with bools
                    shap_interaction=False,
                    decision_trees=False)

ExplainerDashboard(explainer).run()