def generate_assets(): X_train, y_train, X_test, y_test = titanic_survive() model = RandomForestClassifier(n_estimators=5, max_depth=2) model.fit(X_train, y_train) explainer = ClassifierExplainer( model, X_test, y_test, cats=[{'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']}, 'Deck', 'Embarked'], labels=['Not survived', 'Survived']) dashboard = ExplainerDashboard(explainer, [ ShapDependenceComposite(explainer, title="Test Tab!"), ShapDependenceComposite, "importances" ], title="Test Title!") pkl_dir = Path.cwd() / "tests" / "cli_assets" explainer.dump(pkl_dir / "explainer.joblib") explainer.to_yaml(pkl_dir / "explainer.yaml") dashboard.to_yaml(pkl_dir / "dashboard.yaml", explainerfile=str(pkl_dir / "explainer.joblib")) return None
def setUp(self): X_train, y_train, X_test, y_test = titanic_survive() train_names, test_names = titanic_names() self.model = RandomForestClassifier(n_estimators=5, max_depth=2) self.model.fit(X_train, y_train) self.explainer = ClassifierExplainer( self.model, X_test, y_test, cats=[{'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']}, 'Deck', 'Embarked'], labels=['Not survived', 'Survived']) self.dashboard = ExplainerDashboard(self.explainer, [ ShapDependenceTab(self.explainer, title="Test Tab!"), ShapDependenceTab, "importances" ], title="Test Title!") self.pkl_dir = Path.cwd() / "tests" / "cli_assets" self.explainer.dump(self.pkl_dir / "explainer.joblib") self.explainer.to_yaml(self.pkl_dir / "explainer.yaml") self.dashboard.to_yaml(self.pkl_dir / "dashboard.yaml", explainerfile=str(self.pkl_dir / "explainer.joblib"))
class DashboardTests(unittest.TestCase): def setUp(self): X_train, y_train, X_test, y_test = titanic_survive() train_names, test_names = titanic_names() self.model = RandomForestClassifier(n_estimators=5, max_depth=2) self.model.fit(X_train, y_train) self.explainer = ClassifierExplainer( self.model, X_test, y_test, cats=[{'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']}, 'Deck', 'Embarked'], labels=['Not survived', 'Survived']) self.dashboard = ExplainerDashboard(self.explainer, [ ShapDependenceTab(self.explainer, title="Test Tab!"), ShapDependenceTab, "importances" ], title="Test Title!") self.pkl_dir = Path.cwd() / "tests" / "cli_assets" self.explainer.dump(self.pkl_dir / "explainer.joblib") self.explainer.to_yaml(self.pkl_dir / "explainer.yaml") self.dashboard.to_yaml(self.pkl_dir / "dashboard.yaml", explainerfile=str(self.pkl_dir / "explainer.joblib")) def test_yaml(self): yaml = self.dashboard.to_yaml() self.assertIsInstance(yaml, str) def test_yaml_dict(self): yaml_dict = self.dashboard.to_yaml(return_dict=True) self.assertIsInstance(yaml_dict, dict) self.assertIn("dashboard", yaml_dict) def test_load_config_joblib(self): db = ExplainerDashboard.from_config( self.pkl_dir / "explainer.joblib", self.pkl_dir / "dashboard.yaml") self.assertIsInstance(db, ExplainerDashboard) def test_load_config_yaml(self): db = ExplainerDashboard.from_config( self.pkl_dir / "dashboard.yaml") self.assertIsInstance(db, ExplainerDashboard) def test_load_config_explainer(self): db = ExplainerDashboard.from_config( self.explainer, self.pkl_dir / "dashboard.yaml") self.assertIsInstance(db, ExplainerDashboard)
def dashboard_exp(model, X_data, y_data): import dash_bootstrap_components as dbc from explainerdashboard import RegressionExplainer, ExplainerDashboard ExplainerDashboard( RegressionExplainer(model, X_data, y_data), bootstrap=dbc.themes.SANDSTONE, importances=True, model_summary=False, contributions=True, whatif=True, shap_dependence=False, shap_interaction=False, decision_trees=False, hide_whatifindexselector=True, hide_whatifprediction=True, hide_inputeditor=False, hide_whatifcontributiongraph=False, hide_whatifcontributiontable=True, hide_whatifpdp=False, hide_predindexselector=True, hide_predictionsummary=True, hide_contributiongraph=False, hide_pdp=False, hide_contributiontable=True, hide_dropna=True, hide_range=True, hide_depth=True, hide_sort=True, hide_sample=True, # hide sample size input on pdp component hide_gridlines=True, # hide gridlines on pdp component hide_gridpoints=True, hide_cats_sort= True, # hide the sorting option for categorical features hide_cutoff= True, # hide cutoff selector on classification components hide_percentage= True, # hide percentage toggle on classificaiton components hide_log_x= True, # hide x-axis logs toggle on regression plots hide_log_y= True, # hide y-axis logs toggle on regression plots hide_ratio=True, # hide the residuals type dropdown hide_points= True, # hide the show violin scatter markers toggle hide_winsor=True, # hide the winsorize input hide_wizard= True, # hide the wizard toggle in lift curve component hide_star_explanation=True, ).run()
model = pickle.load(open(MODELS_DIR / 'general_model.pkl', 'rb')) y = pd.read_csv(DATA_DIR / 'general_target.csv', index_col=['Ticker']).drop(columns=['Date']) X = pd.read_csv(DATA_DIR / f'general_features.csv', index_col=['Ticker']).drop(columns=['Date']) # Dashboard Explainer is fussy about Column Names X.columns = X.columns.str.replace('.', '') feature_names = model.get_booster().feature_names feature_names = [x.replace('.', '') for x in feature_names] model.get_booster().feature_names = feature_names explainer = RegressionExplainer(model, X, y) db = ExplainerDashboard( explainer, title="Stock Valuation Explainer", description= "Visit https://share.streamlit.io/gardnmi/fundamental-stock-prediction to see the model in use,", shap_interaction=False, precision='float32', decision_trees=False) db.to_yaml("dashboard.yaml", explainerfile="explainer.joblib", dump_explainer=True) db = ExplainerDashboard.from_config("dashboard.yaml") app = db.flask_server()
from unittest.mock import MagicMock import sys sys.modules["xgboost"] = MagicMock() from explainerdashboard import RegressionExplainer, ExplainerDashboard explainer = RegressionExplainer.from_file("explainer.joblib") # you can override params during load from_config: db = ExplainerDashboard.from_config(explainer, "dashboard.yaml", title="Test") app = db.flask_server() # run waitress-serve --port=8070 dashboard:app in command line
# Have to run on http://127.0.0.1:8050/ to work, to figure out if can work with streamlit or not. # Import libraries here from sklearn.ensemble import RandomForestClassifier from explainerdashboard import ClassifierExplainer, ExplainerDashboard from explainerdashboard.datasets import titanic_survive, feature_descriptions X_train, y_train, X_test, y_test = titanic_survive() model = RandomForestClassifier(n_estimators=50, max_depth=10).fit(X_train, y_train) explainer = ClassifierExplainer(model, X_test, y_test, cats=['Sex', 'Deck', 'Embarked'], descriptions=feature_descriptions, labels=['Not survived', 'Survived']) ExplainerDashboard(explainer).run()
from explainerdashboard import RegressionExplainer, ExplainerDashboard from explainerdashboard.custom import * from joblib import load import pandas as pd from sklearn.model_selection import train_test_split # Load model & data dec_tree = load('dec_tree_v3.joblib') df_data = pd.read_csv('data_v3_enc.csv') # Prepare & split data X = df_data.drop(['Duration', 'Timestamp'], axis=1) y = df_data.Duration Xt, X_small, yt, y_small = train_test_split(X, y, test_size=0.01, random_state=0) exp = RegressionExplainer(dec_tree, X_small, y_small, cats=['Day_of_week', 'Hour', 'Vehicle', 'Position']) # Build db = ExplainerDashboard(exp, [ShapDependenceComposite, WhatIfComposite], hide_whatifpdp=True) # Save exp.dump("explainer.joblib") db.to_yaml("dashboard.yaml")
n_jobs=8, num_parallel_tree=1, objective='reg:squarederror', random_state=42, reg_alpha=1, reg_lambda=0, scale_pos_weight=1.0, seed=42, subsample=0.6000000000000001, tree_method='exact', validate_parameters=1, verbosity=None) X, Y = pd.read_csv('X.csv', index_col=0), pd.read_csv('Y.csv', index_col=0) REmodel = model.fit(X, Y) explainer = RegressionExplainer(REmodel, X, Y, cats=cats, descriptions=feature_descriptions, units="$") ExplainerDashboard( explainer, title='XGBoost Regression Model Explainer: Predicting House Prices', description= 'This dashboard shows the inner workings of a fitted machine learning model, and explains its predictions.', shap_interaction=False, decision_trees=False).run(port=int(os.environ.get('PORT', 5000)))
def test_load_config_explainer(self): db = ExplainerDashboard.from_config( self.explainer, self.pkl_dir / "dashboard.yaml") self.assertIsInstance(db, ExplainerDashboard)
print('Incoming request') return Response('Hello') model = CatBoostClassifier(cat_features=list(d.columns)) print(list(d.columns)) model.load_model("Diabetes (1).cbm") y = pd.DataFrame() y["class"] = d["class"] d.pop("class") y["class"], _ = y["class"].factorize() y["class"] = 1 - y["class"] db = ExplainerDashboard.from_config("dashboard.yaml") def dashboard(request): return db.app.index() if __name__ == '__main__': port = int(os.environ.get("PORT", 2000)) with Configurator() as config: config.add_route('hello', '/') config.add_view(dashboard, route_name='hello') app = config.make_wsgi_app() serve(app, host='0.0.0.0', port=port)
from explainerdashboard import ClassifierExplainer, RegressionExplainer, ExplainerDashboard from explainerdashboard.datasets import * pkl_dir = Path.cwd() / "pkls" # classifier X_train, y_train, X_test, y_test = titanic_survive() model = RandomForestClassifier(n_estimators=50, max_depth=5).fit(X_train, y_train) clas_explainer = ClassifierExplainer(model, X_test, y_test, cats=['Sex', 'Deck', 'Embarked'], descriptions=feature_descriptions, labels=['Not survived', 'Survived']) _ = ExplainerDashboard(clas_explainer) clas_explainer.dump(pkl_dir / "clas_explainer.joblib") # regression X_train, y_train, X_test, y_test = titanic_fare() model = RandomForestRegressor(n_estimators=50, max_depth=5).fit(X_train, y_train) reg_explainer = RegressionExplainer(model, X_test, y_test, cats=['Sex', 'Deck', 'Embarked'], descriptions=feature_descriptions, units="$") _ = ExplainerDashboard(reg_explainer) reg_explainer.dump(pkl_dir / "reg_explainer.joblib")
from explainerdashboard.explainers import RandomForestRegressionExplainer from sklearn.ensemble import RandomForestRegressor from sklearn import tree from explainerdashboard import ClassifierExplainer, ExplainerDashboard, RegressionExplainer from explainerdashboard.datasets import titanic_survive, feature_descriptions from sklearn.model_selection import train_test_split import pandas as pd import numpy as np imdb = pd.read_csv("Amélioration/Data/movies2.csv", encoding="latin-1") imdb = imdb.rename(columns = {'11:14' :'film','7.2' : 'metascore','Crime' : 'genre', 'Greg Marcks':'realisateur', 'Henry Thomas' : 'acteur_1', 'Colin Hanks':'acteur_2' ,'6000000' : 'budget', '0': 'votes2', '0.1': 'vote', 'Aug 12, 2005': 'date'}) colonne = ['genre','acteur_1', 'acteur_2', 'realisateur'] imdb = pd.get_dummies(imdb, columns= colonne) imdb = imdb.drop(columns= ['film', 'budget', 'votes2', 'vote', 'date'], axis = 1 ) X = imdb.loc[:, imdb.columns != 'metascore' ] y = imdb.loc[:, imdb.columns == 'metascore' ] X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state=42) model = RandomForestRegressor().fit(X_train, y_train) explainer = RegressionExplainer(model, X_test, y_test) db = ExplainerDashboard(explainer, title="Metascore de film", whatif=False, # you can switch off tabs with bools shap_interaction=False, decision_trees=False) ExplainerDashboard(explainer).run()