def Explanation_Dashboard(global_explanation, model=None, x_train=pd.DataFrame(), x_test=pd.DataFrame(), y_train=pd.DataFrame(), y_test=pd.DataFrame(), explantion_data=None, design=None): if globals_file.imported_model_files: model, x_train, x_test, y_train, y_test = get_best_pipeline(design) if model == None: return "Exported Pipeline is missing" if x_train.empty: return "Training features is missing" if x_test.empty: return "Testing features is missing" if y_train.empty: return "Training Target is missing" if y_test.empty: return "Testing Target is missing" if explantion_data == 'Training': ExplanationDashboard(global_explanation, model, dataset=x_train, true_y=y_train) else: ExplanationDashboard(global_explanation, model, dataset=x_test, true_y=y_test)
def test_explanation_dashboard_many_columns(self): X, y = make_classification(n_features=2000) # Split data into train and test X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) classes = np.unique(y_train).tolist() feature_names = ["col" + str(i) for i in list(range(X_train.shape[1]))] X_train = pd.DataFrame(X_train, columns=feature_names) X_test = pd.DataFrame(X_test, columns=feature_names) knn = sklearn.neighbors.KNeighborsClassifier() knn.fit(X_train, y_train) model_task = ModelTask.Classification explainer = MimicExplainer(knn, X_train, LGBMExplainableModel, model_task=model_task) global_explanation = explainer.explain_global(X_test) ExplanationDashboard(explanation=global_explanation, model=knn, dataset=X_test, true_y=y_test, classes=classes)
def Explanation_Dashboard(global_explanation, model, x_train, x_test, y_train, y_test, explantion_data=None): if explantion_data == 'Training': ExplanationDashboard(global_explanation, model, dataset=x_train, true_y=y_train) else: ExplanationDashboard(global_explanation, model, dataset=x_test, true_y=y_test)
def test_local_explanation(self, mimic_explainer): # Validate visualizing ExplanationDashboard with a local explanation x_train, x_test, y_train, y_test, feature_names, target_names = create_cancer_data() # Fit an SVM model model = create_sklearn_svm_classifier(x_train, y_train) explainer = mimic_explainer(model, x_train, LGBMExplainableModel, features=feature_names, classes=target_names) explanation = explainer.explain_local(x_test) ExplanationDashboard(explanation, model, dataset=x_test, true_y=y_test)
def test_raw_timestamp_explanation(self, mimic_explainer): df = retrieve_dataset( 'insurance_claims.csv', na_values='?', parse_dates=['policy_bind_date', 'incident_date']) label = 'fraud_reported' df_y = df[label] df_X = df.drop(columns=label) x_train, x_test, y_train, y_test = train_test_split(df_X, df_y, test_size=0.2, random_state=7) str_cols = df_X.select_dtypes( exclude=[np.number, np.datetime64]).columns.tolist() dt_cols = df_X.select_dtypes(include=[np.datetime64]).columns.tolist() numeric_cols = df_X.select_dtypes(include=[np.number]).columns.tolist() transforms_list = [] for str_col in str_cols: transforms_list.append( (str_col, Pipeline(steps=[('imputer', SimpleImputer(strategy='most_frequent') ), ('ohe', OneHotEncoder(sparse=False))]), [str_col])) for numeric_col in numeric_cols: transforms_list.append( (numeric_col, Pipeline(steps=[('imputer', SimpleImputer( strategy='mean')), ('scaler', StandardScaler())]), [numeric_col])) for dt_col in dt_cols: transforms_list.append( (dt_col, Pipeline(steps=[('scaler', StandardScaler())]), [dt_col])) transformations = ColumnTransformer(transforms_list) x_train_transformed = transformations.fit_transform(x_train) model = create_lightgbm_classifier(x_train_transformed, y_train) model_task = ModelTask.Classification features = df_X.columns.tolist() explainer = mimic_explainer(model, x_train, LGBMExplainableModel, transformations=transformations, features=features, model_task=model_task) explanation = explainer.explain_global(x_train) dashboard_pipeline = Pipeline(steps=[('preprocess', transformations), ('model', model)]) ExplanationDashboard(explanation, dashboard_pipeline, dataset=x_train, true_y=y_train)
x_train, x_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=0) clf = svm.SVC(gamma=0.001, C=100., probability=True) model = clf.fit(x_train, y_train) explainer = TabularExplainer(model, x_train, features=feature_names, classes=classes) global_explanation = explainer.explain_global(x_test) instance_num = 0 local_explanation = explainer.explain_local(x_test[instance_num, :]) prediction_value = clf.predict(x_test)[instance_num] sorted_local_importance_values = local_explanation.get_ranked_local_values()[ prediction_value] sorted_local_importance_names = local_explanation.get_ranked_local_names()[ prediction_value] ExplanationDashboard(global_explanation, model, dataset=x_test, true_y=y_test) ModelPerformanceDashboard(model, dataset=x_test, true_y=y_test) input("Press Enter to continue...")